From 8b14782b5a937508eeba8625942c6afda8788e60 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Mar 31 2020 09:35:16 +0000 Subject: import qemu-kvm-ma-2.12.0-44.el7 --- diff --git a/SOURCES/kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch b/SOURCES/kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch new file mode 100644 index 0000000..056fb0d --- /dev/null +++ b/SOURCES/kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch @@ -0,0 +1,160 @@ +From 23dd23db9af68c2076239087f505c88b138ca409 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:22 +0200 +Subject: [PATCH 03/23] kvm: Add support to KVM_GET_MSR_FEATURE_INDEX_LIST and + KVM_GET_MSRS system ioctl + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-4-git-send-email-plai@redhat.com> +Patchwork-id: 88531 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 3/8] kvm: Add support to KVM_GET_MSR_FEATURE_INDEX_LIST and KVM_GET_MSRS system ioctl +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Robert Hoo + +Add kvm_get_supported_feature_msrs() to get supported MSR feature index list. +Add kvm_arch_get_supported_msr_feature() to get each MSR features value. + +Signed-off-by: Robert Hoo +Message-Id: <1539578845-37944-2-git-send-email-robert.hu@linux.intel.com> +Reviewed-by: Eduardo Habkost +Signed-off-by: Eduardo Habkost +(cherry picked from commit f57bceb6ab5163ddd6c41ff4344ab8cf28a9c63d) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + include/sysemu/kvm.h | 2 ++ + target/i386/kvm.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 82 insertions(+) + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 23669c4..3d8f294 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -464,6 +464,8 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension); + + uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, + uint32_t index, int reg); ++uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); ++ + + void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 00f2141..0ecec4a 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -106,6 +106,7 @@ static int has_pit_state2; + static bool has_msr_mcg_ext_ctl; + + static struct kvm_cpuid2 *cpuid_cache; ++static struct kvm_msr_list *kvm_feature_msrs; + + int kvm_has_pit_state2(void) + { +@@ -405,6 +406,42 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + return ret; + } + ++uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) ++{ ++ struct { ++ struct kvm_msrs info; ++ struct kvm_msr_entry entries[1]; ++ } msr_data; ++ uint32_t ret; ++ ++ if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */ ++ return 0; ++ } ++ ++ /* Check if requested MSR is supported feature MSR */ ++ int i; ++ for (i = 0; i < kvm_feature_msrs->nmsrs; i++) ++ if (kvm_feature_msrs->indices[i] == index) { ++ break; ++ } ++ if (i == kvm_feature_msrs->nmsrs) { ++ return 0; /* if the feature MSR is not supported, simply return 0 */ ++ } ++ ++ msr_data.info.nmsrs = 1; ++ msr_data.entries[0].index = index; ++ ++ ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data); ++ if (ret != 1) { ++ error_report("KVM get MSR (index=0x%x) feature failed, %s", ++ index, strerror(-ret)); ++ exit(1); ++ } ++ ++ return msr_data.entries[0].data; ++} ++ ++ + typedef struct HWPoisonPage { + ram_addr_t ram_addr; + QLIST_ENTRY(HWPoisonPage) list; +@@ -1164,6 +1201,47 @@ void kvm_arch_do_init_vcpu(X86CPU *cpu) + } + } + ++static int kvm_get_supported_feature_msrs(KVMState *s) ++{ ++ int ret = 0; ++ ++ if (kvm_feature_msrs != NULL) { ++ return 0; ++ } ++ ++ if (!kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES)) { ++ return 0; ++ } ++ ++ struct kvm_msr_list msr_list; ++ ++ msr_list.nmsrs = 0; ++ ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list); ++ if (ret < 0 && ret != -E2BIG) { ++ error_report("Fetch KVM feature MSR list failed: %s", ++ strerror(-ret)); ++ return ret; ++ } ++ ++ assert(msr_list.nmsrs > 0); ++ kvm_feature_msrs = (struct kvm_msr_list *) \ ++ g_malloc0(sizeof(msr_list) + ++ msr_list.nmsrs * sizeof(msr_list.indices[0])); ++ ++ kvm_feature_msrs->nmsrs = msr_list.nmsrs; ++ ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs); ++ ++ if (ret < 0) { ++ error_report("Fetch KVM feature MSR list failed: %s", ++ strerror(-ret)); ++ g_free(kvm_feature_msrs); ++ kvm_feature_msrs = NULL; ++ return ret; ++ } ++ ++ return 0; ++} ++ + static int kvm_get_supported_msrs(KVMState *s) + { + static int kvm_supported_msrs; +@@ -1320,6 +1398,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + return ret; + } + ++ kvm_get_supported_feature_msrs(s); ++ + uname(&utsname); + lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch b/SOURCES/kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch index ddc256f..9ca9864 100644 --- a/SOURCES/kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch +++ b/SOURCES/kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch @@ -1,19 +1,19 @@ -From 1cde1aa618f1ef1f847f192c86cef52446d6546d Mon Sep 17 00:00:00 2001 +From 903fa86ac4e6c03d135777213f06943bbc2ffd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 1 Aug 2019 16:43:56 +0200 +Date: Wed, 31 Jul 2019 20:13:05 +0200 Subject: [PATCH] Fix heap overflow in ip_reass on big packet input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Philippe Mathieu-Daudé -Message-id: <20190801164356.14992-2-philmd@redhat.com> -Patchwork-id: 89857 -O-Subject: [RHEL-7.7 qemu-kvm-ma PATCH 1/1] Fix heap overflow in ip_reass on big packet input -Bugzilla: 1735652 -RH-Acked-by: Marc-André Lureau +Message-id: <20190731201305.28657-2-philmd@redhat.com> +Patchwork-id: 89840 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 1/1] Fix heap overflow in ip_reass on big packet input +Bugzilla: 1734753 1735653 RH-Acked-by: Stefan Hajnoczi RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau From: Samuel Thibault diff --git a/SOURCES/kvm-Revert-mc146818rtc-fix-timer-interrupt-reinjection.patch b/SOURCES/kvm-Revert-mc146818rtc-fix-timer-interrupt-reinjection.patch new file mode 100644 index 0000000..75b33c6 --- /dev/null +++ b/SOURCES/kvm-Revert-mc146818rtc-fix-timer-interrupt-reinjection.patch @@ -0,0 +1,121 @@ +From 8af16b9722f5bdeacf3a30c21490846e24b989b2 Mon Sep 17 00:00:00 2001 +From: Marcelo Tosatti +Date: Wed, 4 Dec 2019 15:21:08 +0100 +Subject: [PATCH 2/3] Revert "mc146818rtc: fix timer interrupt reinjection" + +RH-Author: Marcelo Tosatti +Message-id: <20191204152436.753293175@amt.cnet> +Patchwork-id: 92888 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 2/3] Revert "mc146818rtc: fix timer interrupt reinjection" +Bugzilla: 1639098 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Vitaly Kuznetsov + +BZ: 1639098 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=24854309 +BRANCH: rhv7/master-2.12.0 +Upstream: 3ae32adff17226bc6a5f3fd7bb9804e6779e0660 +of pbonzini's for-upstream tree. + +This reverts commit b429de730174b388ea5760e3debb0d542ea3c261, except +that the reversal of the outer "if (period)" is left in. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Marcelo Tosatti +Signed-off-by: Miroslav Rezanina +--- + hw/timer/mc146818rtc.c | 67 +++++++++++++++++++++++++------------------------- + 1 file changed, 33 insertions(+), 34 deletions(-) + +diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c +index 296d974..d848911 100644 +--- a/hw/timer/mc146818rtc.c ++++ b/hw/timer/mc146818rtc.c +@@ -196,7 +196,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) + int64_t cur_clock, next_irq_clock, lost_clock = 0; + + period = rtc_periodic_clock_ticks(s); +- + if (!period) { + s->irq_coalesced = 0; + timer_del(s->periodic_timer); +@@ -219,42 +218,42 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) + last_periodic_clock = next_periodic_clock - old_period; + lost_clock = cur_clock - last_periodic_clock; + assert(lost_clock >= 0); ++ } + ++ /* ++ * s->irq_coalesced can change for two reasons: ++ * ++ * a) if one or more periodic timer interrupts have been lost, ++ * lost_clock will be more that a period. ++ * ++ * b) when the period may be reconfigured, we expect the OS to ++ * treat delayed tick as the new period. So, when switching ++ * from a shorter to a longer period, scale down the missing, ++ * because the OS will treat past delayed ticks as longer ++ * (leftovers are put back into lost_clock). When switching ++ * to a shorter period, scale up the missing ticks since the ++ * OS handler will treat past delayed ticks as shorter. ++ */ ++ if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { ++ uint32_t old_irq_coalesced = s->irq_coalesced; ++ ++ s->period = period; ++ lost_clock += old_irq_coalesced * old_period; ++ s->irq_coalesced = lost_clock / s->period; ++ lost_clock %= s->period; ++ if (old_irq_coalesced != s->irq_coalesced || ++ old_period != s->period) { ++ DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, " ++ "period scaled from %d to %d\n", old_irq_coalesced, ++ s->irq_coalesced, old_period, s->period); ++ rtc_coalesced_timer_update(s); ++ } ++ } else { + /* +- * s->irq_coalesced can change for two reasons: +- * +- * a) if one or more periodic timer interrupts have been lost, +- * lost_clock will be more that a period. +- * +- * b) when the period may be reconfigured, we expect the OS to +- * treat delayed tick as the new period. So, when switching +- * from a shorter to a longer period, scale down the missing, +- * because the OS will treat past delayed ticks as longer +- * (leftovers are put back into lost_clock). When switching +- * to a shorter period, scale up the missing ticks since the +- * OS handler will treat past delayed ticks as shorter. ++ * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW ++ * is not used, we should make the time progress anyway. + */ +- if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { +- uint32_t old_irq_coalesced = s->irq_coalesced; +- +- s->period = period; +- lost_clock += old_irq_coalesced * old_period; +- s->irq_coalesced = lost_clock / s->period; +- lost_clock %= s->period; +- if (old_irq_coalesced != s->irq_coalesced || +- old_period != s->period) { +- DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, " +- "period scaled from %d to %d\n", old_irq_coalesced, +- s->irq_coalesced, old_period, s->period); +- rtc_coalesced_timer_update(s); +- } +- } else { +- /* +- * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW +- * is not used, we should make the time progress anyway. +- */ +- lost_clock = MIN(lost_clock, period); +- } ++ lost_clock = MIN(lost_clock, period); + } + + assert(lost_clock >= 0 && lost_clock <= period); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch b/SOURCES/kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch new file mode 100644 index 0000000..690996c --- /dev/null +++ b/SOURCES/kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch @@ -0,0 +1,76 @@ +From d8ed655838cc87d703e1a1419bd9704fceae6a6c Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:25 +0200 +Subject: [PATCH 06/23] kvm: Use KVM_GET_MSR_INDEX_LIST for + MSR_IA32_ARCH_CAPABILITIES support + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-7-git-send-email-plai@redhat.com> +Patchwork-id: 88529 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 6/8] kvm: Use KVM_GET_MSR_INDEX_LIST for MSR_IA32_ARCH_CAPABILITIES support +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Bandan Das + +When writing to guest's MSR_IA32_ARCH_CAPABILITIES, check whether it's +supported in the guest using the KVM_GET_MSR_INDEX_LIST ioctl. + +Fixes: d86f963694df27f11b3681ffd225c9362de1b634 +Suggested-by: Eduardo Habkost +Tested-by: balducci@units.it +Signed-off-by: Bandan Das +Message-Id: +Signed-off-by: Eduardo Habkost +(cherry picked from commit aec5e9c3a94cf8b7920f59bef69a6f426092c4a0) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + target/i386/kvm.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 88a4114..c99c0ef 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -94,6 +94,7 @@ static bool has_msr_xss; + static bool has_msr_spec_ctrl; + static bool has_msr_virt_ssbd; + static bool has_msr_smi_count; ++static bool has_msr_arch_capabs; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -1330,6 +1331,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_VIRT_SSBD: + has_msr_virt_ssbd = true; + break; ++ case MSR_IA32_ARCH_CAPABILITIES: ++ has_msr_arch_capabs = true; ++ break; + } + } + } +@@ -1834,14 +1838,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + #endif + + /* If host supports feature MSR, write down. */ +- if (kvm_feature_msrs) { +- int i; +- for (i = 0; i < kvm_feature_msrs->nmsrs; i++) +- if (kvm_feature_msrs->indices[i] == MSR_IA32_ARCH_CAPABILITIES) { +- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, +- env->features[FEAT_ARCH_CAPABILITIES]); +- break; +- } ++ if (has_msr_arch_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, ++ env->features[FEAT_ARCH_CAPABILITIES]); + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/SOURCES/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch new file mode 100644 index 0000000..0f8a77e --- /dev/null +++ b/SOURCES/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -0,0 +1,61 @@ +From 3f9fbb23959f82de389fa43848bb28cd2b80a4bb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 6 Sep 2019 14:00:34 +0200 +Subject: [PATCH 1/4] Using ip_deq after m_free might read pointers from an + allocation reuse. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190906140034.19722-2-philmd@redhat.com> +Patchwork-id: 90306 +O-Subject: [RHEL-7.7 qemu-kvm-ma + RHEL-7.7 qemu-kvm-rhev + RHEL-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. +Bugzilla: 1749723 +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Samuel Thibault + +This would be difficult to exploit, but that is still related with +CVE-2019-14378 which generates fragmented IP packets that would trigger this +issue and at least produce a DoS. + +Signed-off-by: Samuel Thibault +(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Miroslav Rezanina +--- + slirp/ip_input.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/slirp/ip_input.c b/slirp/ip_input.c +index 07d8808..7cf0133 100644 +--- a/slirp/ip_input.c ++++ b/slirp/ip_input.c +@@ -300,6 +300,7 @@ ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + */ + while (q != (struct ipasfrag*)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; +@@ -307,9 +308,10 @@ ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + m_adj(dtom(slirp, q), i); + break; + } ++ prev = q; + q = q->ipf_next; +- m_free(dtom(slirp, q->ipf_prev)); +- ip_deq(q->ipf_prev); ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); + } + + insert: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Add-BDRV_REQ_NO_FALLBACK.patch b/SOURCES/kvm-block-Add-BDRV_REQ_NO_FALLBACK.patch new file mode 100644 index 0000000..81a2375 --- /dev/null +++ b/SOURCES/kvm-block-Add-BDRV_REQ_NO_FALLBACK.patch @@ -0,0 +1,109 @@ +From 5ad7c32387034a02c9a932018e60580872431db9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:56:58 +0200 +Subject: [PATCH 10/23] block: Add BDRV_REQ_NO_FALLBACK + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-3-mlevitsk@redhat.com> +Patchwork-id: 88557 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 2/9] block: Add BDRV_REQ_NO_FALLBACK +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +For qemu-img convert, we want an operation that zeroes out the whole +image if this can be done efficiently, but that returns an error +otherwise so we don't write explicit zeroes and immediately overwrite +them with the real data, potentially doubling the amount of data to be +written. + +Signed-off-by: Kevin Wolf +Acked-by: Eric Blake + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from fe0480d6294270ff0d6fb60e66bb725a6aad2043) + +Signed-off-by: Miroslav Rezanina +--- + block/io.c | 12 +++++++++++- + include/block/block.h | 7 ++++++- + 2 files changed, 17 insertions(+), 2 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 18bf3c2..26c4075 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1029,6 +1029,7 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, + unsigned int nb_sectors; + + assert(!(flags & ~BDRV_REQ_MASK)); ++ assert(!(flags & BDRV_REQ_NO_FALLBACK)); + + if (!drv) { + return -ENOMEDIUM; +@@ -1074,6 +1075,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, + int ret; + + assert(!(flags & ~BDRV_REQ_MASK)); ++ assert(!(flags & BDRV_REQ_NO_FALLBACK)); + + if (!drv) { + return -ENOMEDIUM; +@@ -1499,6 +1501,10 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + return -ENOMEDIUM; + } + ++ if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) { ++ return -ENOTSUP; ++ } ++ + assert(alignment % bs->bl.request_alignment == 0); + head = offset % alignment; + tail = (offset + bytes) % alignment; +@@ -1542,7 +1548,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + assert(!bs->supported_zero_flags); + } + +- if (ret == -ENOTSUP) { ++ if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) { + /* Fall back to bounce buffer if write zeroes is unsupported */ + BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; + +@@ -2973,6 +2979,10 @@ static int coroutine_fn bdrv_co_copy_range_internal( + BdrvTrackedRequest req; + int ret; + ++ /* TODO We can support BDRV_REQ_NO_FALLBACK here */ ++ assert(!(read_flags & BDRV_REQ_NO_FALLBACK)); ++ assert(!(write_flags & BDRV_REQ_NO_FALLBACK)); ++ + if (!dst || !dst->bs) { + return -ENOMEDIUM; + } +diff --git a/include/block/block.h b/include/block/block.h +index 5f40140..33fb60c 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -82,8 +82,13 @@ typedef enum { + */ + BDRV_REQ_SERIALISING = 0x80, + ++ /* Execute the request only if the operation can be offloaded or otherwise ++ * be executed efficiently, but return an error instead of using a slow ++ * fallback. */ ++ BDRV_REQ_NO_FALLBACK = 0x100, ++ + /* Mask of valid flags */ +- BDRV_REQ_MASK = 0xff, ++ BDRV_REQ_MASK = 0x1ff, + } BdrvRequestFlags; + + typedef struct BlockSizes { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Advertise-BDRV_REQ_NO_FALLBACK-in-filter-drive.patch b/SOURCES/kvm-block-Advertise-BDRV_REQ_NO_FALLBACK-in-filter-drive.patch new file mode 100644 index 0000000..8fade5d --- /dev/null +++ b/SOURCES/kvm-block-Advertise-BDRV_REQ_NO_FALLBACK-in-filter-drive.patch @@ -0,0 +1,99 @@ +From f8516c7e4c09fce49f49d065a1facd6240e9c8c3 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:56:59 +0200 +Subject: [PATCH 11/23] block: Advertise BDRV_REQ_NO_FALLBACK in filter drivers + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-4-mlevitsk@redhat.com> +Patchwork-id: 88559 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 3/9] block: Advertise BDRV_REQ_NO_FALLBACK in filter drivers +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +Filter drivers that support .bdrv_co_pwrite_zeroes can safely advertise +BDRV_REQ_NO_FALLBACK because they just forward the request flags to +their child node. + +Signed-off-by: Kevin Wolf +Acked-by: Eric Blake + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from 80f5c33ff31eb9333f5036ee278fb1483fb4ff41 with some conflicts) + +Signed-off-by: Miroslav Rezanina +--- + block/blkdebug.c | 2 +- + block/copy-on-read.c | 7 +++---- + block/mirror.c | 3 ++- + block/raw-format.c | 2 +- + 4 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/block/blkdebug.c b/block/blkdebug.c +index 526af2a..bb38e70 100644 +--- a/block/blkdebug.c ++++ b/block/blkdebug.c +@@ -401,7 +401,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | +- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & ++ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + ret = -EINVAL; + +diff --git a/block/copy-on-read.c b/block/copy-on-read.c +index 1dcdaee..dfa40a9 100644 +--- a/block/copy-on-read.c ++++ b/block/copy-on-read.c +@@ -34,12 +34,11 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags, + } + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | +- (BDRV_REQ_FUA & +- bs->file->bs->supported_write_flags); ++ (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | +- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & +- bs->file->bs->supported_zero_flags); ++ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & ++ bs->file->bs->supported_zero_flags); + + return 0; + } +diff --git a/block/mirror.c b/block/mirror.c +index 8658873..55dc94f 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1175,7 +1175,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, + } + mirror_top_bs->total_sectors = bs->total_sectors; + mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; +- mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; ++ mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | ++ BDRV_REQ_NO_FALLBACK; + bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); + + /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep +diff --git a/block/raw-format.c b/block/raw-format.c +index a359198..f5d26cd 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -432,7 +432,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | +- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & ++ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + + if (bs->probed && !bdrv_is_read_only(bs)) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Drain-source-node-in-bdrv_replace_node.patch b/SOURCES/kvm-block-Drain-source-node-in-bdrv_replace_node.patch new file mode 100644 index 0000000..78689de --- /dev/null +++ b/SOURCES/kvm-block-Drain-source-node-in-bdrv_replace_node.patch @@ -0,0 +1,66 @@ +From b8a78d2f99bb481e78686ef2e0beb369b5563d87 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 3 Sep 2019 15:28:28 +0200 +Subject: [PATCH 23/23] block: Drain source node in bdrv_replace_node() + +RH-Author: Kevin Wolf +Message-id: <20190903152828.15668-2-kwolf@redhat.com> +Patchwork-id: 90257 +O-Subject: [RHEL-7.7.z qemu-kvm PATCH 1/1] block: Drain source node in bdrv_replace_node() +Bugzilla: 1711643 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Pankaj Gupta + +Instead of just asserting that no requests are in flight in +bdrv_replace_node(), which is a requirement that most callers ignore, we +can just drain the source node right there. This fixes at least starting +a commit job while I/O is active on the backing chain, but probably +other callers, too. + +Having requests in flight on the target node isn't a problem because the +target just gets new parents, but the call path of running requests +isn't modified. So we can just drop this assertion without a replacement. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1711643 +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit f871abd60f4b67547e62c57c9bec19420052be39) +Signed-off-by: Kevin Wolf +Signed-off-by: Miroslav Rezanina +--- + block.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/block.c b/block.c +index 7061f9b..4986516 100644 +--- a/block.c ++++ b/block.c +@@ -3505,13 +3505,13 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + uint64_t perm = 0, shared = BLK_PERM_ALL; + int ret; + +- assert(!atomic_read(&from->in_flight)); +- assert(!atomic_read(&to->in_flight)); +- + /* Make sure that @from doesn't go away until we have successfully attached + * all of its parents to @to. */ + bdrv_ref(from); + ++ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); ++ bdrv_drained_begin(from); ++ + /* Put all parents into @list and calculate their cumulative permissions */ + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + if (!should_update_child(c, to)) { +@@ -3546,6 +3546,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + + out: + g_slist_free(list); ++ bdrv_drained_end(from); + bdrv_unref(from); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Remove-error-messages-in-bdrv_make_zero.patch b/SOURCES/kvm-block-Remove-error-messages-in-bdrv_make_zero.patch new file mode 100644 index 0000000..424b8f8 --- /dev/null +++ b/SOURCES/kvm-block-Remove-error-messages-in-bdrv_make_zero.patch @@ -0,0 +1,60 @@ +From 8403396f05aa1e54198433409a6db7924eb53969 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:56:57 +0200 +Subject: [PATCH 09/23] block: Remove error messages in bdrv_make_zero() + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-2-mlevitsk@redhat.com> +Patchwork-id: 88564 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 1/9] block: Remove error messages in bdrv_make_zero() +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +There is only a single caller of bdrv_make_zero(), which is qemu-img +convert. If the function fails, we just fall back to a different method +of zeroing out blocks on the target image. There is no good reason to +print error messages on stderr when the higher level operation will +actually succeed. + +Signed-off-by: Kevin Wolf +Acked-by: Eric Blake + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from 48ce986096bb70354b12f0becb253a06bcf9c434) + +Signed-off-by: Miroslav Rezanina +--- + block/io.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 7a99f7b..18bf3c2 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -913,8 +913,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) + } + ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL); + if (ret < 0) { +- error_report("error getting block status at offset %" PRId64 ": %s", +- offset, strerror(-ret)); + return ret; + } + if (ret & BDRV_BLOCK_ZERO) { +@@ -923,8 +921,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) + } + ret = bdrv_pwrite_zeroes(child, offset, bytes, flags); + if (ret < 0) { +- error_report("error writing zeroes at offset %" PRId64 ": %s", +- offset, strerror(-ret)); + return ret; + } + offset += bytes; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch b/SOURCES/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch new file mode 100644 index 0000000..b6330b8 --- /dev/null +++ b/SOURCES/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch @@ -0,0 +1,107 @@ +From a87f7f01dbdf6d87d6cd0f1a2e48341d4b2269ba Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 13 Sep 2019 14:12:25 +0200 +Subject: [PATCH 2/4] block/create: Do not abort if a block driver is not + available +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190913141225.12022-2-philmd@redhat.com> +Patchwork-id: 90451 +O-Subject: [RHEL-7.7 qemu-kvm-rhev + RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] block/create: Do not abort if a block driver is not available +Bugzilla: 1746224 +RH-Acked-by: Kevin Wolf +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi + +The 'blockdev-create' QMP command was introduced as experimental +feature in commit b0292b851b8, using the assert() debug call. +It got promoted to 'stable' command in 3fb588a0f2c, but the +assert call was not removed. + +Some block drivers are optional, and bdrv_find_format() might +return a NULL value, triggering the assertion. + +Stable code is not expected to abort, so return an error instead. + +This is easily reproducible when libnfs is not installed: + + ./configure + [...] + module support no + Block whitelist (rw) + Block whitelist (ro) + libiscsi support yes + libnfs support no + [...] + +Start QEMU: + + $ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait + +Send the 'blockdev-create' with the 'nfs' driver: + + $ ( cat << 'EOF' + {'execute': 'qmp_capabilities'} + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + EOF + ) | socat STDIO UNIX:/tmp/qemu.qmp + {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}} + {"return": {}} + +QEMU crashes: + + $ gdb qemu-system-x86_64 core + Program received signal SIGSEGV, Segmentation fault. + (gdb) bt + #0 0x00007ffff510957f in raise () at /lib64/libc.so.6 + #1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6 + #2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 + #3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6 + #4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69 + #5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314 + #6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131 + #7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174 + +With this patch applied, QEMU returns a QMP error: + + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + {"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}} + +Cc: qemu-stable@nongnu.org +Reported-by: Xu Tian +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Blake +Reviewed-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Miroslav Rezanina +--- + block/create.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/create.c b/block/create.c +index 9534121..de5e97b 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, + const char *fmt = BlockdevDriver_str(options->driver); + BlockDriver *drv = bdrv_find_format(fmt); + ++ if (!drv) { ++ error_setg(errp, "Block driver '%s' not found or not supported", fmt); ++ return; ++ } ++ + /* If the driver is in the schema, we know that it exists. But it may not + * be whitelisted. */ +- assert(drv); + if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { + error_setg(errp, "Driver is not whitelisted"); + return; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch b/SOURCES/kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch new file mode 100644 index 0000000..b47f08b --- /dev/null +++ b/SOURCES/kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch @@ -0,0 +1,66 @@ +From eb91002f4fb6ed6ae36034b178c5480caa5dccae Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 25 Jun 2019 13:30:12 +0200 +Subject: [PATCH 19/23] ccid: Fix dwProtocols advertisement of T=0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Maxim Levitsky +Message-id: <20190625133012.8221-2-mlevitsk@redhat.com> +Patchwork-id: 88926 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 1/1] ccid: Fix dwProtocols advertisement of T=0 +Bugzilla: 1721522 +RH-Acked-by: John Snow +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Markus Armbruster + +From: Jason Andryuk + +Commit d7d218ef02d87c637d20d64da8f575d434ff6f78 attempted to change +dwProtocols to only advertise support for T=0 and not T=1. The change +was incorrect as it changed 0x00000003 to 0x00010000. + +lsusb -v in a linux guest shows: +"dwProtocols 65536 (Invalid values detected)", though the +smart card could still be accessed. Windows 7 does not detect inserted +smart cards and logs the the following Error in the Event Logs: + + Source: Smart Card Service + Event ID: 610 + Smart Card Reader 'QEMU QEMU USB CCID 0' rejected IOCTL SET_PROTOCOL: + Incorrect function. If this error persists, your smart card or reader + may not be functioning correctly + + Command Header: 03 00 00 00 + +Setting to 0x00000001 fixes the Windows issue. + +Signed-off-by: Jason Andryuk +Message-id: 20180420183219.20722-1-jandryuk@gmail.com +Cc: qemu-stable@nongnu.org +Signed-off-by: Gerd Hoffmann +(cherry picked from commit 0ee86bb6c5beb6498488850104f7557c376d0bef) +Signed-off-by: Miroslav Rezanina +--- + hw/usb/dev-smartcard-reader.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c +index 214d3e9..f745192 100644 +--- a/hw/usb/dev-smartcard-reader.c ++++ b/hw/usb/dev-smartcard-reader.c +@@ -329,8 +329,8 @@ static const uint8_t qemu_ccid_descriptor[] = { + */ + 0x07, /* u8 bVoltageSupport; 01h - 5.0v, 02h - 3.0, 03 - 1.8 */ + +- 0x00, 0x00, /* u32 dwProtocols; RRRR PPPP. RRRR = 0000h.*/ +- 0x01, 0x00, /* PPPP: 0001h = Protocol T=0, 0002h = Protocol T=1 */ ++ 0x01, 0x00, /* u32 dwProtocols; RRRR PPPP. RRRR = 0000h.*/ ++ 0x00, 0x00, /* PPPP: 0001h = Protocol T=0, 0002h = Protocol T=1 */ + /* u32 dwDefaultClock; in kHZ (0x0fa0 is 4 MHz) */ + 0xa0, 0x0f, 0x00, 0x00, + /* u32 dwMaximumClock; */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Avoid-aio_worker-for-QEMU_AIO_WRITE_ZEROE.patch b/SOURCES/kvm-file-posix-Avoid-aio_worker-for-QEMU_AIO_WRITE_ZEROE.patch new file mode 100644 index 0000000..48fed01 --- /dev/null +++ b/SOURCES/kvm-file-posix-Avoid-aio_worker-for-QEMU_AIO_WRITE_ZEROE.patch @@ -0,0 +1,147 @@ +From 207dc8620ffb1a1bdbfaddba5208a82f93058b2c Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:57:02 +0200 +Subject: [PATCH 14/23] file-posix: Avoid aio_worker() for + QEMU_AIO_WRITE_ZEROES + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-7-mlevitsk@redhat.com> +Patchwork-id: 88561 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 6/9] file-posix: Avoid aio_worker() for QEMU_AIO_WRITE_ZEROES +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +aio_worker() doesn't add anything interesting, it's only a useless +indirection. Call the handler function directly instead. + +As we know that this handler function is only called from coroutine +context and the coroutine stays around until the worker thread finishes, +we can keep RawPosixAIOData on the stack. + +Signed-off-by: Kevin Wolf + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from 7154d8ae66c75c97b08c8f1c80dd6f46f0dbffca) + +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 53 ++++++++++++++++++++++++++++++++++------------------- + 1 file changed, 34 insertions(+), 19 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 74da336..90c719f 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1465,8 +1465,9 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) + return ret; + } + +-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) ++static int handle_aiocb_write_zeroes(void *opaque) + { ++ RawPosixAIOData *aiocb = opaque; + #if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS) + BDRVRawState *s = aiocb->bs->opaque; + #endif +@@ -1530,8 +1531,9 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) + return -ENOTSUP; + } + +-static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb) ++static int handle_aiocb_write_zeroes_unmap(void *opaque) + { ++ RawPosixAIOData *aiocb = opaque; + BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque; + int ret; + +@@ -1797,11 +1799,7 @@ static int aio_worker(void *arg) + ret = handle_aiocb_discard(aiocb); + break; + case QEMU_AIO_WRITE_ZEROES: +- ret = handle_aiocb_write_zeroes(aiocb); +- break; + case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD: +- ret = handle_aiocb_write_zeroes_unmap(aiocb); +- break; + case QEMU_AIO_COPY_RANGE: + ret = handle_aiocb_copy_range(aiocb); + break; +@@ -2518,18 +2516,41 @@ static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, + cb, opaque, QEMU_AIO_DISCARD); + } + +-static int coroutine_fn raw_co_pwrite_zeroes( +- BlockDriverState *bs, int64_t offset, +- int bytes, BdrvRequestFlags flags) ++static int coroutine_fn ++raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, ++ BdrvRequestFlags flags, bool blkdev) + { + BDRVRawState *s = bs->opaque; +- int operation = QEMU_AIO_WRITE_ZEROES; ++ RawPosixAIOData acb; ++ ThreadPoolFunc *handler; ++ ++ acb = (RawPosixAIOData) { ++ .bs = bs, ++ .aio_fildes = s->fd, ++ .aio_type = QEMU_AIO_WRITE_ZEROES, ++ .aio_offset = offset, ++ .aio_nbytes = bytes, ++ }; ++ ++ if (blkdev) { ++ acb.aio_type |= QEMU_AIO_BLKDEV; ++ } + + if (flags & BDRV_REQ_MAY_UNMAP) { +- operation |= QEMU_AIO_DISCARD; ++ acb.aio_type |= QEMU_AIO_DISCARD; ++ handler = handle_aiocb_write_zeroes_unmap; ++ } else { ++ handler = handle_aiocb_write_zeroes; + } + +- return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); ++ return raw_thread_pool_submit(bs, handler, &acb); ++} ++ ++static int coroutine_fn raw_co_pwrite_zeroes( ++ BlockDriverState *bs, int64_t offset, ++ int bytes, BdrvRequestFlags flags) ++{ ++ return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false); + } + + static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +@@ -3093,8 +3114,6 @@ static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, + static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) + { +- BDRVRawState *s = bs->opaque; +- int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV; + int rc; + + rc = fd_open(bs); +@@ -3102,11 +3121,7 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + return rc; + } + +- if (flags & BDRV_REQ_MAY_UNMAP) { +- operation |= QEMU_AIO_DISCARD; +- } +- +- return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); ++ return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); + } + + static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Factor-out-raw_thread_pool_submit.patch b/SOURCES/kvm-file-posix-Factor-out-raw_thread_pool_submit.patch new file mode 100644 index 0000000..2c2c981 --- /dev/null +++ b/SOURCES/kvm-file-posix-Factor-out-raw_thread_pool_submit.patch @@ -0,0 +1,90 @@ +From aa184b19787e6f9701c6b273394d502e03d8b0c1 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:57:01 +0200 +Subject: [PATCH 13/23] file-posix: Factor out raw_thread_pool_submit() + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-6-mlevitsk@redhat.com> +Patchwork-id: 88556 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 5/9] file-posix: Factor out raw_thread_pool_submit() +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +Getting the thread pool of the AioContext of a block node and scheduling +some work in it is an operation that is already done twice, and we'll +get more instances. Factor it out into a separate function. + +Signed-off-by: Kevin Wolf + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from 5d5de250056b0972cde2e88133db702960a32b72 with some conflicts) + +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 5b93d06..74da336 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1818,13 +1818,20 @@ static int aio_worker(void *arg) + return ret; + } + ++static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs, ++ ThreadPoolFunc func, void *arg) ++{ ++ /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ ++ ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); ++ return thread_pool_submit_co(pool, func, arg); ++} ++ + static int paio_submit_co_full(BlockDriverState *bs, int fd, + int64_t offset, int fd2, int64_t offset2, + QEMUIOVector *qiov, + int bytes, int type) + { + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); +- ThreadPool *pool; + + acb->bs = bs; + acb->aio_type = type; +@@ -1842,8 +1849,7 @@ static int paio_submit_co_full(BlockDriverState *bs, int fd, + } + + trace_paio_submit_co(offset, bytes, type); +- pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); +- return thread_pool_submit_co(pool, aio_worker, acb); ++ return raw_thread_pool_submit(bs, aio_worker, acb); + } + + static inline int paio_submit_co(BlockDriverState *bs, int fd, +@@ -1976,7 +1982,6 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + PreallocMode prealloc, Error **errp) + { + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); +- ThreadPool *pool; + + *acb = (RawPosixAIOData) { + .bs = bs, +@@ -1987,9 +1992,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + .errp = errp, + }; + +- /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ +- pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); +- return thread_pool_submit_co(pool, aio_worker, acb); ++ return raw_thread_pool_submit(bs, aio_worker, acb); + } + + static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch b/SOURCES/kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch new file mode 100644 index 0000000..450eee6 --- /dev/null +++ b/SOURCES/kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch @@ -0,0 +1,153 @@ +From 4704bbaba38bfeb1710b109f0b4b32a839b24734 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:57:00 +0200 +Subject: [PATCH 12/23] file-posix: Fix write_zeroes with unmap on block + devices + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-5-mlevitsk@redhat.com> +Patchwork-id: 88562 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 4/9] file-posix: Fix write_zeroes with unmap on block devices +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +The BLKDISCARD ioctl doesn't guarantee that the discarded blocks read as +all-zero afterwards, so don't try to abuse it for zero writing. We try +to only use this if BLKDISCARDZEROES tells us that it is safe, but this +is unreliable on older kernels and a constant 0 in newer kernels. In +other words, this code path is never actually used with newer kernels, +so we don't even try to unmap while writing zeros. + +This patch removes the abuse of discard for writing zeroes from +file-posix and instead adds a new function that uses interfaces that are +actually meant to deallocate and zero out at the same time. Only if +those fail, it falls back to zeroing out without unmap. We never fall +back to a discard operation any more that may or may not result in +zeros. + +Signed-off-by: Kevin Wolf + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from 34fa110e424e9a6a9b7e0274c3d4bfee766eb7ed) + +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 59 ++++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 44 insertions(+), 15 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 518f16b..5b93d06 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -632,7 +632,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + } + #endif + +- bs->supported_zero_flags = s->discard_zeroes ? BDRV_REQ_MAY_UNMAP : 0; ++ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; + ret = 0; + fail: + if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { +@@ -1530,6 +1530,35 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) + return -ENOTSUP; + } + ++static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb) ++{ ++ BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque; ++ int ret; ++ ++ /* First try to write zeros and unmap at the same time */ ++ ++#ifdef CONFIG_FALLOCATE_PUNCH_HOLE ++ ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, ++ aiocb->aio_offset, aiocb->aio_nbytes); ++ if (ret != -ENOTSUP) { ++ return ret; ++ } ++#endif ++ ++#ifdef CONFIG_XFS ++ if (s->is_xfs) { ++ /* xfs_discard() guarantees that the discarded area reads as all-zero ++ * afterwards, so we can use it here. */ ++ return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); ++ } ++#endif ++ ++ /* If we couldn't manage to unmap while guaranteed that the area reads as ++ * all-zero afterwards, just write zeroes without unmapping */ ++ ret = handle_aiocb_write_zeroes(aiocb); ++ return ret; ++} ++ + #ifndef HAVE_COPY_FILE_RANGE + static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, + off_t *out_off, size_t len, unsigned int flags) +@@ -1770,6 +1799,9 @@ static int aio_worker(void *arg) + case QEMU_AIO_WRITE_ZEROES: + ret = handle_aiocb_write_zeroes(aiocb); + break; ++ case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD: ++ ret = handle_aiocb_write_zeroes_unmap(aiocb); ++ break; + case QEMU_AIO_COPY_RANGE: + ret = handle_aiocb_copy_range(aiocb); + break; +@@ -2488,15 +2520,13 @@ static int coroutine_fn raw_co_pwrite_zeroes( + int bytes, BdrvRequestFlags flags) + { + BDRVRawState *s = bs->opaque; ++ int operation = QEMU_AIO_WRITE_ZEROES; + +- if (!(flags & BDRV_REQ_MAY_UNMAP)) { +- return paio_submit_co(bs, s->fd, offset, NULL, bytes, +- QEMU_AIO_WRITE_ZEROES); +- } else if (s->discard_zeroes) { +- return paio_submit_co(bs, s->fd, offset, NULL, bytes, +- QEMU_AIO_DISCARD); ++ if (flags & BDRV_REQ_MAY_UNMAP) { ++ operation |= QEMU_AIO_DISCARD; + } +- return -ENOTSUP; ++ ++ return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); + } + + static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +@@ -3061,20 +3091,19 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) + { + BDRVRawState *s = bs->opaque; ++ int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV; + int rc; + + rc = fd_open(bs); + if (rc < 0) { + return rc; + } +- if (!(flags & BDRV_REQ_MAY_UNMAP)) { +- return paio_submit_co(bs, s->fd, offset, NULL, bytes, +- QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV); +- } else if (s->discard_zeroes) { +- return paio_submit_co(bs, s->fd, offset, NULL, bytes, +- QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); ++ ++ if (flags & BDRV_REQ_MAY_UNMAP) { ++ operation |= QEMU_AIO_DISCARD; + } +- return -ENOTSUP; ++ ++ return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); + } + + static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Handle-undetectable-alignment.patch b/SOURCES/kvm-file-posix-Handle-undetectable-alignment.patch new file mode 100644 index 0000000..f860d80 --- /dev/null +++ b/SOURCES/kvm-file-posix-Handle-undetectable-alignment.patch @@ -0,0 +1,129 @@ +From 29c28722af50333db06c4c2497852896cf15ca23 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 23 Aug 2019 15:11:10 +0200 +Subject: [PATCH 3/4] file-posix: Handle undetectable alignment + +RH-Author: Max Reitz +Message-id: <20190823151110.17322-2-mreitz@redhat.com> +Patchwork-id: 90139 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH 1/1] file-posix: Handle undetectable alignment +Bugzilla: 1743365 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +From: Nir Soffer + +In some cases buf_align or request_alignment cannot be detected: + +1. With Gluster, buf_align cannot be detected since the actual I/O is + done on Gluster server, and qemu buffer alignment does not matter. + Since we don't have alignment requirement, buf_align=1 is the best + value. + +2. With local XFS filesystem, buf_align cannot be detected if reading + from unallocated area. In this we must align the buffer, but we don't + know what is the correct size. Using the wrong alignment results in + I/O error. + +3. With Gluster backed by XFS, request_alignment cannot be detected if + reading from unallocated area. In this case we need to use the + correct alignment, and failing to do so results in I/O errors. + +4. With NFS, the server does not use direct I/O, so both buf_align cannot + be detected. In this case we don't need any alignment so we can use + buf_align=1 and request_alignment=1. + +These cases seems to work when storage sector size is 512 bytes, because +the current code starts checking align=512. If the check succeeds +because alignment cannot be detected we use 512. But this does not work +for storage with 4k sector size. + +To determine if we can detect the alignment, we probe first with +align=1. If probing succeeds, maybe there are no alignment requirement +(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we +don't have any way to tell, we treat this as undetectable alignment. If +probing with align=1 fails with EINVAL, but probing with one of the +expected alignments succeeds, we know that we found a working alignment. + +Practically the alignment requirements are the same for buffer +alignment, buffer length, and offset in file. So in case we cannot +detect buf_align, we can use request alignment. If we cannot detect +request alignment, we can fallback to a safe value. To use this logic, +we probe first request alignment instead of buf_align. + +Here is a table showing the behaviour with current code (the value in +parenthesis is the optimal value). + +Case Sector buf_align (opt) request_alignment (opt) result + +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 36 +++++++++++++++++++++++++----------- + 1 file changed, 25 insertions(+), 11 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index d1926b3..548424d 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -325,6 +325,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) + BDRVRawState *s = bs->opaque; + char *buf; + size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); ++ size_t alignments[] = {1, 512, 1024, 2048, 4096}; + + /* For SCSI generic devices the alignment is not really used. + With buffered I/O, we don't have any restrictions. */ +@@ -351,25 +352,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) + } + #endif + +- /* If we could not get the sizes so far, we can only guess them */ +- if (!s->buf_align) { ++ /* ++ * If we could not get the sizes so far, we can only guess them. First try ++ * to detect request alignment, since it is more likely to succeed. Then ++ * try to detect buf_align, which cannot be detected in some cases (e.g. ++ * Gluster). If buf_align cannot be detected, we fallback to the value of ++ * request_alignment. ++ */ ++ ++ if (!bs->bl.request_alignment) { ++ int i; + size_t align; +- buf = qemu_memalign(max_align, 2 * max_align); +- for (align = 512; align <= max_align; align <<= 1) { +- if (raw_is_io_aligned(fd, buf + align, max_align)) { +- s->buf_align = align; ++ buf = qemu_memalign(max_align, max_align); ++ for (i = 0; i < ARRAY_SIZE(alignments); i++) { ++ align = alignments[i]; ++ if (raw_is_io_aligned(fd, buf, align)) { ++ /* Fallback to safe value. */ ++ bs->bl.request_alignment = (align != 1) ? align : max_align; + break; + } + } + qemu_vfree(buf); + } + +- if (!bs->bl.request_alignment) { ++ if (!s->buf_align) { ++ int i; + size_t align; +- buf = qemu_memalign(s->buf_align, max_align); +- for (align = 512; align <= max_align; align <<= 1) { +- if (raw_is_io_aligned(fd, buf, align)) { +- bs->bl.request_alignment = align; ++ buf = qemu_memalign(max_align, 2 * max_align); ++ for (i = 0; i < ARRAY_SIZE(alignments); i++) { ++ align = alignments[i]; ++ if (raw_is_io_aligned(fd, buf + align, max_align)) { ++ /* Fallback to request_aligment. */ ++ s->buf_align = (align != 1) ? align : bs->bl.request_alignment; + break; + } + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Support-BDRV_REQ_NO_FALLBACK-for-zero-wri.patch b/SOURCES/kvm-file-posix-Support-BDRV_REQ_NO_FALLBACK-for-zero-wri.patch new file mode 100644 index 0000000..17cb59d --- /dev/null +++ b/SOURCES/kvm-file-posix-Support-BDRV_REQ_NO_FALLBACK-for-zero-wri.patch @@ -0,0 +1,103 @@ +From 9a02c0bcafdbf681e76f816ad3f60dfb7dea13fb Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:57:03 +0200 +Subject: [PATCH 15/23] file-posix: Support BDRV_REQ_NO_FALLBACK for zero + writes + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-8-mlevitsk@redhat.com> +Patchwork-id: 88560 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 7/9] file-posix: Support BDRV_REQ_NO_FALLBACK for zero writes +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +We know that the kernel implements a slow fallback code path for +BLKZEROOUT, so if BDRV_REQ_NO_FALLBACK is given, we shouldn't call it. +The other operations we call in the context of .bdrv_co_pwrite_zeroes +should usually be quick, so no modification should be needed for them. +If we ever notice that there are additional problematic cases, we can +still make these conditional as well. + +Signed-off-by: Kevin Wolf +Acked-by: Eric Blake + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from 738301e11758171defaa5a5237d584f8226af89f) + +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 24 ++++++++++++++++-------- + include/block/raw-aio.h | 1 + + 2 files changed, 17 insertions(+), 8 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 90c719f..d1926b3 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -632,7 +632,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + } + #endif + +- bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; ++ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; + ret = 0; + fail: + if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { +@@ -1449,14 +1449,19 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) + } + + #ifdef BLKZEROOUT +- do { +- uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; +- if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { +- return 0; +- } +- } while (errno == EINTR); ++ /* The BLKZEROOUT implementation in the kernel doesn't set ++ * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow ++ * fallbacks. */ ++ if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) { ++ do { ++ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; ++ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { ++ return 0; ++ } ++ } while (errno == EINTR); + +- ret = translate_err(-errno); ++ ret = translate_err(-errno); ++ } + #endif + + if (ret == -ENOTSUP) { +@@ -2535,6 +2540,9 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + if (blkdev) { + acb.aio_type |= QEMU_AIO_BLKDEV; + } ++ if (flags & BDRV_REQ_NO_FALLBACK) { ++ acb.aio_type |= QEMU_AIO_NO_FALLBACK; ++ } + + if (flags & BDRV_REQ_MAY_UNMAP) { + acb.aio_type |= QEMU_AIO_DISCARD; +diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h +index 2ffcd9d..5a926a3 100644 +--- a/include/block/raw-aio.h ++++ b/include/block/raw-aio.h +@@ -40,6 +40,7 @@ + /* AIO flags */ + #define QEMU_AIO_MISALIGNED 0x1000 + #define QEMU_AIO_BLKDEV 0x2000 ++#define QEMU_AIO_NO_FALLBACK 0x4000 + + + /* linux-aio.c - Linux native implementation */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch b/SOURCES/kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch new file mode 100644 index 0000000..1c02644 --- /dev/null +++ b/SOURCES/kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch @@ -0,0 +1,68 @@ +From f326041af5bd8ebebee83dbc296ab853955cd806 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:21 +0200 +Subject: [PATCH 02/23] i386: Add CPUID bit and feature words for + IA32_ARCH_CAPABILITIES MSR + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-3-git-send-email-plai@redhat.com> +Patchwork-id: 88533 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 2/8] i386: Add CPUID bit and feature words for IA32_ARCH_CAPABILITIES MSR +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Robert Hoo + +Support of IA32_PRED_CMD MSR already be enumerated by same CPUID bit as +SPEC_CTRL. + +At present, mark CPUID_7_0_EDX_ARCH_CAPABILITIES unmigratable, per Paolo's +comment. + +Signed-off-by: Robert Hoo +Message-Id: <1530781798-183214-3-git-send-email-robert.hu@linux.intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 3fc7c73139d2d38ae80c3b0bc963b1ac1555924c) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 3 ++- + target/i386/cpu.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 4558b1a..4c7364b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1008,12 +1008,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, "spec-ctrl", "stibp", +- NULL, NULL, NULL, "ssbd", ++ NULL, "arch-capabilities", NULL, "ssbd", + }, + .cpuid_eax = 7, + .cpuid_needs_ecx = true, .cpuid_ecx = 0, + .cpuid_reg = R_EDX, + .tcg_features = TCG_7_0_EDX_FEATURES, ++ .unmigratable_flags = CPUID_7_0_EDX_ARCH_CAPABILITIES, + }, + [FEAT_8000_0007_EDX] = { + .feat_names = { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index aabb6c8..eb39724 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -689,6 +689,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ + #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ ++#define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ + + #define KVM_HINTS_DEDICATED (1U << 0) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch b/SOURCES/kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch new file mode 100644 index 0000000..adb01fe --- /dev/null +++ b/SOURCES/kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch @@ -0,0 +1,50 @@ +From 1261bc0eac292f500640c7ca96e381a044650717 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:20 +0200 +Subject: [PATCH 01/23] i386: Add new MSR indices for IA32_PRED_CMD and + IA32_ARCH_CAPABILITIES + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-2-git-send-email-plai@redhat.com> +Patchwork-id: 88528 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 1/8] i386: Add new MSR indices for IA32_PRED_CMD and IA32_ARCH_CAPABILITIES +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Robert Hoo + +IA32_PRED_CMD MSR gives software a way to issue commands that affect the state +of indirect branch predictors. Enumerated by CPUID.(EAX=7H,ECX=0):EDX[26]. +IA32_ARCH_CAPABILITIES MSR enumerates architectural features of RDCL_NO and +IBRS_ALL. Enumerated by CPUID.(EAX=07H, ECX=0):EDX[29]. + +https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf + +Signed-off-by: Robert Hoo +Message-Id: <1530781798-183214-2-git-send-email-robert.hu@linux.intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 8c80c99fcceabd0708a5a83f08577e778c9419f5) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index ea8c355..aabb6c8 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -352,6 +352,8 @@ typedef enum X86Seg { + #define MSR_TSC_ADJUST 0x0000003b + #define MSR_IA32_SPEC_CTRL 0x48 + #define MSR_VIRT_SSBD 0xc001011f ++#define MSR_IA32_PRED_CMD 0x49 ++#define MSR_IA32_ARCH_CAPABILITIES 0x10a + #define MSR_IA32_TSCDEADLINE 0x6e0 + + #define FEATURE_CONTROL_LOCKED (1<<0) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Add-new-model-of-Cascadelake-Server.patch b/SOURCES/kvm-i386-Add-new-model-of-Cascadelake-Server.patch new file mode 100644 index 0000000..d6fc740 --- /dev/null +++ b/SOURCES/kvm-i386-Add-new-model-of-Cascadelake-Server.patch @@ -0,0 +1,107 @@ +From 00a6c6f0214ad15a68870ad753e656e17a12afe6 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Fri, 4 Oct 2019 19:57:12 +0200 +Subject: [PATCH 2/4] i386: Add new model of Cascadelake-Server + +RH-Author: Eduardo Habkost +Message-id: <20191004195714.10176-2-ehabkost@redhat.com> +Patchwork-id: 90960 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 1/3] i386: Add new model of Cascadelake-Server +Bugzilla: 1638472 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov + +From: Tao Xu + +New CPU models mostly inherit features from ancestor Skylake-Server, +while addin new features: AVX512_VNNI, Intel PT. +SSBD support for speculative execution +side channel mitigations. + +Note: + +On Cascadelake, some capabilities (RDCL_NO, IBRS_ALL, RSBA, +SKIP_L1DFL_VMENTRY and SSB_NO) are enumerated by MSR. +These features rely on MSR based feature support patch. +Will be added later after that patch's in. +http://lists.nongnu.org/archive/html/qemu-devel/2018-09/msg00074.html + +Signed-off-by: Tao Xu +Message-Id: <20180919031122.28487-2-tao3.xu@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit c7a88b52f62b30c04158eeb07f73e3f72221b6a8) +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 54 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5d6b45b..2b85193 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2483,6 +2483,60 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel Xeon Processor (Skylake, IBRS)", + }, + { ++ .name = "Cascadelake-Server", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 85, ++ .stepping = 5, ++ .features[FEAT_1_EDX] = ++ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | ++ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | ++ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | ++ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | ++ CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | ++ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | ++ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | ++ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | ++ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | ++ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | ++ CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | ++ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | ++ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | ++ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | ++ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | ++ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | ++ CPUID_7_0_EBX_INTEL_PT, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | ++ CPUID_7_0_ECX_AVX512VNNI, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ /* Missing: XSAVES (not supported by some Linux versions, ++ * including v4.1 to v4.12). ++ * KVM doesn't yet expose any XSAVES state save component, ++ * and the only one defined in Skylake (processor tracing) ++ * probably will block migration anyway. ++ */ ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (Cascadelake)", ++ }, ++ { + .name = "Opteron_G1", + .level = 5, + .vendor = CPUID_VENDOR_AMD, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Disable-OSPKE-on-Cascadelake-Server.patch b/SOURCES/kvm-i386-Disable-OSPKE-on-Cascadelake-Server.patch new file mode 100644 index 0000000..85528dc --- /dev/null +++ b/SOURCES/kvm-i386-Disable-OSPKE-on-Cascadelake-Server.patch @@ -0,0 +1,76 @@ +From a746ca268865ce21cf72710577e1fc3f69ce9506 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Fri, 4 Oct 2019 19:57:13 +0200 +Subject: [PATCH 3/4] i386: Disable OSPKE on Cascadelake-Server + +RH-Author: Eduardo Habkost +Message-id: <20191004195714.10176-3-ehabkost@redhat.com> +Patchwork-id: 90962 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 2/3] i386: Disable OSPKE on Cascadelake-Server +Bugzilla: 1638472 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov + +This is a partial cherry pick of upstream commit: + +commit bb4928c7cafe50ab2137a0034e350ef1bfa044d9 +Author: Eduardo Habkost +Date: Tue Mar 19 17:05:15 2019 -0300 + + i386: Disable OSPKE on CPU model definitions + + Currently, the Cascadelake-Server, Icelake-Client, and + Icelake-Server are always generating the following warning: + + qemu-system-x86_64: warning: \ + host doesn't support requested feature: CPUID.07H:ECX [bit 4] + + This happens because OSPKE was never returned by + GET_SUPPORTED_CPUID or x86_cpu_get_supported_feature_word(). + OSPKE is a runtime flag automatically set by the KVM module or by + TCG code, was always cleared by x86_cpu_filter_features(), and + was not supposed to appear on the CPU model table. + + Remove the OSPKE flag from the CPU model table entries, to avoid + the bogus warning and avoid returning invalid feature data on + query-cpu-* QMP commands. As OSPKE was always cleared by + x86_cpu_filter_features(), this won't have any guest-visible + impact. + + Include a test case that should detect the problem if we introduce + a similar bug again. + + Fixes: c7a88b52f62b ("i386: Add new model of Cascadelake-Server") + Fixes: 8a11c62da914 ("i386: Add new CPU model Icelake-{Server,Client}") + Cc: Tao Xu + Cc: Robert Hoo + Signed-off-by: Eduardo Habkost + Message-Id: <20190319200515.14999-1-ehabkost@redhat.com> + Signed-off-by: Eduardo Habkost + +It includes only the Cascadelake-Server change, because Icelake* +is not present in the RHEL7 tree. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 2b85193..b377564 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2518,7 +2518,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_INTEL_PT, + .features[FEAT_7_0_ECX] = +- CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | ++ CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512VNNI, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Make-arch_capabilities-migratable.patch b/SOURCES/kvm-i386-Make-arch_capabilities-migratable.patch new file mode 100644 index 0000000..570d4b2 --- /dev/null +++ b/SOURCES/kvm-i386-Make-arch_capabilities-migratable.patch @@ -0,0 +1,45 @@ +From 94a85e1b51fb7e2f646938330c4a4ebf4ffeb281 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:27 +0200 +Subject: [PATCH 08/23] i386: Make arch_capabilities migratable + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-9-git-send-email-plai@redhat.com> +Patchwork-id: 88530 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 8/8] i386: Make arch_capabilities migratable +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Eduardo Habkost + +Now that kvm_arch_get_supported_cpuid() will only return +arch_capabilities if QEMU is able to initialize the MSR properly, +we know that the feature is safely migratable. + +Signed-off-by: Eduardo Habkost +Message-Id: <20190125220606.4864-3-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 014018e19b3c54dd1bf5072bc912ceffea40abe8) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 478c5a4..1f2f286 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1053,7 +1053,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .reg = R_EDX, + }, + .tcg_features = TCG_7_0_EDX_FEATURES, +- .unmigratable_flags = CPUID_7_0_EDX_ARCH_CAPABILITIES, + }, + [FEAT_8000_0007_EDX] = { + .type = CPUID_FEATURE_WORD, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-halt-poll-control-MSR-support.patch b/SOURCES/kvm-i386-halt-poll-control-MSR-support.patch new file mode 100644 index 0000000..34230d3 --- /dev/null +++ b/SOURCES/kvm-i386-halt-poll-control-MSR-support.patch @@ -0,0 +1,187 @@ +From ebfeb40743756dfa7087b43ce4e9afe3e85f9710 Mon Sep 17 00:00:00 2001 +From: Marcelo Tosatti +Date: Wed, 28 Aug 2019 01:00:55 +0200 +Subject: [PATCH 1/4] kvm: i386: halt poll control MSR support + +RH-Author: Marcelo Tosatti +Message-id: <20190828010051.GA30252@amt.cnet> +Patchwork-id: 90171 +O-Subject: [RHEL-7.8 qemu-kvm PATCH] kvm: i386: halt poll control MSR support +Bugzilla: 1734502 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina + +This is the qemu part of the haltpoll driver: allows migrating +the MSR which the guest uses to disable host side polling. + +Add support for halt poll control MSR: save/restore, migration +and new feature name. + +The purpose of this MSR is to allow the guest to disable +host halt poll. + +(cherry picked from commit d645e1328726b38b3c79525eb57842ce29c1df7c) +Signed-off-by: Marcelo Tosatti +Message-Id: <20190603230408.GA7938@amt.cnet> +[Do not enable by default, as pointed out by Mark Kanda. - Paolo] +Signed-off-by: Paolo Bonzini + +Signed-off-by: Miroslav Rezanina +--- + linux-headers/asm-x86/kvm_para.h | 2 ++ + target/i386/cpu.c | 4 +++- + target/i386/cpu.h | 1 + + target/i386/kvm.c | 15 +++++++++++++++ + target/i386/machine.c | 20 ++++++++++++++++++++ + 5 files changed, 41 insertions(+), 1 deletion(-) + +diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h +index 4c58184..38ab00d 100644 +--- a/linux-headers/asm-x86/kvm_para.h ++++ b/linux-headers/asm-x86/kvm_para.h +@@ -27,6 +27,7 @@ + #define KVM_FEATURE_PV_UNHALT 7 + #define KVM_FEATURE_PV_TLB_FLUSH 9 + #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 ++#define KVM_FEATURE_POLL_CONTROL 12 + + /* The last 8 bits are used to indicate how to interpret the flags field + * in pvclock structure. If no bits are set, all flags are ignored. +@@ -43,6 +44,7 @@ + #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 + #define MSR_KVM_STEAL_TIME 0x4b564d03 + #define MSR_KVM_PV_EOI_EN 0x4b564d04 ++#define MSR_KVM_POLL_CONTROL 0x4b564d05 + + struct kvm_steal_time { + __u64 steal; +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1f2f286..928e53c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -903,7 +903,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock", + "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt", + NULL, "kvm-pv-tlb-flush", NULL, NULL, +- NULL, NULL, NULL, NULL, ++ "kvm-poll-control", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "kvmclock-stable-bit", NULL, NULL, NULL, +@@ -5342,6 +5342,8 @@ static void x86_cpu_initfn(Object *obj) + object_property_add_alias(obj, "kvm_steal_time", obj, "kvm-steal-time", &error_abort); + object_property_add_alias(obj, "kvm_pv_eoi", obj, "kvm-pv-eoi", &error_abort); + object_property_add_alias(obj, "kvm_pv_unhalt", obj, "kvm-pv-unhalt", &error_abort); ++ object_property_add_alias(obj, "kvm_poll_control", obj, "kvm-poll-control", ++ &error_abort); + object_property_add_alias(obj, "svm_lock", obj, "svm-lock", &error_abort); + object_property_add_alias(obj, "nrip_save", obj, "nrip-save", &error_abort); + object_property_add_alias(obj, "tsc_scale", obj, "tsc-scale", &error_abort); +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 8ab313e..095e695 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1233,6 +1233,7 @@ typedef struct CPUX86State { + uint64_t steal_time_msr; + uint64_t async_pf_en_msr; + uint64_t pv_eoi_en_msr; ++ uint64_t poll_control_msr; + + /* Partition-wide HV MSRs, will be updated only on the first vcpu */ + uint64_t msr_hv_hypercall; +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 7afad93..72901e1 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -1199,6 +1199,9 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) + env->msr_hv_synic_sint[i] = HV_SINT_MASKED; + } + } ++ ++ /* enabled by default */ ++ env->poll_control_msr = 1; + } + + void kvm_arch_do_init_vcpu(X86CPU *cpu) +@@ -1869,6 +1872,11 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { + kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr); + } ++ ++ if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) { ++ kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, env->poll_control_msr); ++ } ++ + if (has_architectural_pmu_version > 0) { + if (has_architectural_pmu_version > 1) { + /* Stop the counter. */ +@@ -2241,6 +2249,9 @@ static int kvm_get_msrs(X86CPU *cpu) + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { + kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0); + } ++ if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) { ++ kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1); ++ } + if (has_architectural_pmu_version > 0) { + if (has_architectural_pmu_version > 1) { + kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); +@@ -2470,6 +2481,10 @@ static int kvm_get_msrs(X86CPU *cpu) + case MSR_KVM_STEAL_TIME: + env->steal_time_msr = msrs[i].data; + break; ++ case MSR_KVM_POLL_CONTROL: { ++ env->poll_control_msr = msrs[i].data; ++ break; ++ } + case MSR_CORE_PERF_FIXED_CTR_CTRL: + env->msr_fixed_ctr_ctrl = msrs[i].data; + break; +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 9e7256a..52b1eae 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -325,6 +325,14 @@ static bool steal_time_msr_needed(void *opaque) + return cpu->env.steal_time_msr != 0; + } + ++/* Poll control MSR enabled by default */ ++static bool poll_control_msr_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ ++ return cpu->env.poll_control_msr != 1; ++} ++ + static const VMStateDescription vmstate_steal_time_msr = { + .name = "cpu/steal_time_msr", + .version_id = 1, +@@ -358,6 +366,17 @@ static const VMStateDescription vmstate_pv_eoi_msr = { + } + }; + ++static const VMStateDescription vmstate_poll_control_msr = { ++ .name = "cpu/poll_control_msr", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = poll_control_msr_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.poll_control_msr, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static bool fpop_ip_dp_needed(void *opaque) + { + X86CPU *cpu = opaque; +@@ -1033,6 +1052,7 @@ VMStateDescription vmstate_x86_cpu = { + &vmstate_async_pf_msr, + &vmstate_pv_eoi_msr, + &vmstate_steal_time_msr, ++ &vmstate_poll_control_msr, + &vmstate_fpop_ip_dp, + &vmstate_msr_tsc_adjust, + &vmstate_msr_tscdeadline, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch b/SOURCES/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch new file mode 100644 index 0000000..fb1256b --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch @@ -0,0 +1,71 @@ +From 38633bfc500ac284a13f7da53ea5aabacb8006e1 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:26 +0200 +Subject: [PATCH 07/23] i386: kvm: Disable arch_capabilities if MSR can't be + set + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-8-git-send-email-plai@redhat.com> +Patchwork-id: 88536 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 7/8] i386: kvm: Disable arch_capabilities if MSR can't be set +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Eduardo Habkost + +KVM has two bugs in the handling of MSR_IA32_ARCH_CAPABILITIES: + +1) Linux commit commit 1eaafe91a0df ("kvm: x86: IA32_ARCH_CAPABILITIES + is always supported") makes GET_SUPPORTED_CPUID return + arch_capabilities even if running on SVM. This makes "-cpu + host,migratable=off" incorrectly expose arch_capabilities on CPUID on + AMD hosts (where the MSR is not emulated by KVM). + +2) KVM_GET_MSR_INDEX_LIST does not return MSR_IA32_ARCH_CAPABILITIES if + the MSR is not supported by the host CPU. This makes QEMU not + initialize the MSR properly at kvm_put_msrs() on those hosts. + +Work around both bugs on the QEMU side, by checking if the MSR +was returned by KVM_GET_MSR_INDEX_LIST before returning the +feature flag on kvm_arch_get_supported_cpuid(). + +This has the unfortunate side effect of making arch_capabilities +unavailable on hosts without hardware support for the MSR until bug #2 +is fixed on KVM, but I can't see another way to work around bug #1 +without that side effect. + +Signed-off-by: Eduardo Habkost +Message-Id: <20190125220606.4864-2-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 485b1d256bcb0874bcde0223727c159b6837e6f8) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + target/i386/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index c99c0ef..7afad93 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -374,6 +374,15 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + if (host_tsx_blacklisted()) { + ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); + } ++ } else if (function == 7 && index == 0 && reg == R_EDX) { ++ /* ++ * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. ++ * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is ++ * returned by KVM_GET_MSR_INDEX_LIST. ++ */ ++ if (!has_msr_arch_capabs) { ++ ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; ++ } + } else if (function == 0x80000001 && reg == R_ECX) { + /* + * It's safe to enable TOPOEXT even if it's not returned by +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-Cascadelake-.patch b/SOURCES/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-Cascadelake-.patch new file mode 100644 index 0000000..f87afbd --- /dev/null +++ b/SOURCES/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-Cascadelake-.patch @@ -0,0 +1,59 @@ +From e106165de465520a2bd1153dd41fe409e0157a77 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Fri, 4 Oct 2019 19:57:14 +0200 +Subject: [PATCH 4/4] i386: remove the 'INTEL_PT' CPUID bit from + Cascadelake-Server + +RH-Author: Eduardo Habkost +Message-id: <20191004195714.10176-4-ehabkost@redhat.com> +Patchwork-id: 90961 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 3/3] i386: remove the 'INTEL_PT' CPUID bit from Cascadelake-Server +Bugzilla: 1638472 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov + +From: Paolo Bonzini + +This is a partial cherry pick of upstream commit: + +commit 4c257911dcc7c4189768e9651755c849ce9db4e8 +Author: Paolo Bonzini +Date: Fri Dec 21 12:35:56 2018 +0100 + + i386: remove the 'INTEL_PT' CPUID bit from named CPU models + + Processor tracing is not yet implemented for KVM and it will be an + opt in feature requiring a special module parameter. + Disable it, because it is wrong to enable it by default and + it is impossible that no one has ever used it. + + Cc: qemu-stable@nongnu.org + Signed-off-by: Paolo Bonzini + +It includes only the Cascadelake-Server change, because the other +CPU models are not present in the RHEL7 tree. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index b377564..6a1d59c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2515,8 +2515,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | +- CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | +- CPUID_7_0_EBX_INTEL_PT, ++ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512VNNI, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iscsi-Avoid-potential-for-get_status-overflow.patch b/SOURCES/kvm-iscsi-Avoid-potential-for-get_status-overflow.patch new file mode 100644 index 0000000..cbde759 --- /dev/null +++ b/SOURCES/kvm-iscsi-Avoid-potential-for-get_status-overflow.patch @@ -0,0 +1,61 @@ +From 5d61b8b267cb1d529681fd88a4538c8eee408812 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Wed, 29 Jan 2020 18:47:14 +0100 +Subject: [PATCH 1/2] iscsi: Avoid potential for get_status overflow +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200129184715.18876-2-jmaloy@redhat.com> +Patchwork-id: 93577 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 1/2] iscsi: Avoid potential for get_status overflow +Bugzilla: 1794499 1794505 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé + +From: Eric Blake + +Detected by Coverity: Multiplying two 32-bit int and assigning +the result to a 64-bit number is a risk of overflow. Prior to +the conversion to byte-based interfaces, the block layer took +care of ensuring that a status request never exceeded 2G in +the driver; but after that conversion, the block layer expects +drivers to deal with any size request (the driver can always +truncate the request size back down, as long as it makes +progress). So, in the off-chance that someone makes a large +request, we are at the mercy of whether iscsi_get_lba_status_task() +will cap things to at most INT_MAX / iscsilun->block_size when +it populates lbasd->num_blocks; since I could not easily audit +that, it's better to be safe than sorry by just forcing a 64-bit +multiply. + +Fixes: 92809c36 +CC: qemu-stable@nongnu.org +Signed-off-by: Eric Blake +Message-Id: <20180508212718.1482663-1-eblake@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 8ee1cef4593a7bda076891470c0620e79333c0d0) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + block/iscsi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index c412b12..336ce49 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -734,7 +734,7 @@ retry: + goto out_unlock; + } + +- *pnum = lbasd->num_blocks * iscsilun->block_size; ++ *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch new file mode 100644 index 0000000..0dd2ae8 --- /dev/null +++ b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch @@ -0,0 +1,79 @@ +From 899dabfb59004f7d69d244f836c250590c3574cd Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Wed, 29 Jan 2020 18:47:15 +0100 +Subject: [PATCH 2/2] iscsi: Cap block count from GET LBA STATUS + (CVE-2020-1711) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200129184715.18876-3-jmaloy@redhat.com> +Patchwork-id: 93576 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 2/2] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) +Bugzilla: 1794499 1794505 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé + +From: Felipe Franciosi + +When querying an iSCSI server for the provisioning status of blocks (via +GET LBA STATUS), Qemu only validates that the response descriptor zero's +LBA matches the one requested. Given the SCSI spec allows servers to +respond with the status of blocks beyond the end of the LUN, Qemu may +have its heap corrupted by clearing/setting too many bits at the end of +its allocmap for the LUN. + +A malicious guest in control of the iSCSI server could carefully program +Qemu's heap (by selectively setting the bitmap) and then smash it. + +This limits the number of bits that iscsi_co_block_status() will try to +update in the allocmap so it can't overflow the bitmap. + +Fixes: CVE-2020-1711 +Cc: qemu-stable@nongnu.org +Signed-off-by: Felipe Franciosi +Signed-off-by: Peter Turschmid +Signed-off-by: Raphael Norwitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + block/iscsi.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index 336ce49..8ec97ab 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -671,7 +671,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + struct scsi_get_lba_status *lbas = NULL; + struct scsi_lba_status_descriptor *lbasd = NULL; + struct IscsiTask iTask; +- uint64_t lba; ++ uint64_t lba, max_bytes; + int ret; + + iscsi_co_init_iscsitask(iscsilun, &iTask); +@@ -691,6 +691,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + } + + lba = offset / iscsilun->block_size; ++ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; + + qemu_mutex_lock(&iscsilun->mutex); + retry: +@@ -734,7 +735,7 @@ retry: + goto out_unlock; + } + +- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; ++ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mc146818rtc-fix-timer-interrupt-reinjection-again.patch b/SOURCES/kvm-mc146818rtc-fix-timer-interrupt-reinjection-again.patch new file mode 100644 index 0000000..438b7c3 --- /dev/null +++ b/SOURCES/kvm-mc146818rtc-fix-timer-interrupt-reinjection-again.patch @@ -0,0 +1,133 @@ +From 224a226b03d2c0503915bd1c1139b37b56afd62d Mon Sep 17 00:00:00 2001 +From: Marcelo Tosatti +Date: Wed, 4 Dec 2019 15:21:09 +0100 +Subject: [PATCH 3/3] mc146818rtc: fix timer interrupt reinjection again + +RH-Author: Marcelo Tosatti +Message-id: <20191204152436.823942711@amt.cnet> +Patchwork-id: 92887 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 3/3] mc146818rtc: fix timer interrupt reinjection again +Bugzilla: 1639098 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Vitaly Kuznetsov + +BZ: 1639098 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=24854309 +BRANCH: rhv7/master-2.12.0 +Upstream: 7a3e29b12f5afe0106a5713bb4db6e23dc66ef91 +of pbonzini's for-upstream tree. + +Commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt +reinjection when there is no period change by the guest. In that +case, old_period is 0, which ends up zeroing irq_coalesced (counter of +reinjected interrupts). + +The consequence is Windows 7 is unable to synchronize time via NTP. +Easily reproducible by playing a fullscreen video with cirrus and VNC. + +Fix by passing s->period when periodic_timer_update is called due to +expiration of the timer. With this change, old_period == 0 only +means that the periodic timer was off. + +Reported-by: Marcelo Tosatti +Co-developed-by: Marcelo Tosatti +Signed-off-by: Paolo Bonzini +Signed-off-by: Miroslav Rezanina +--- + hw/timer/mc146818rtc.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c +index d848911..7459040 100644 +--- a/hw/timer/mc146818rtc.c ++++ b/hw/timer/mc146818rtc.c +@@ -190,12 +190,14 @@ static uint32_t rtc_periodic_clock_ticks(RTCState *s) + * is just due to period adjustment. + */ + static void +-periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) ++periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period, bool period_change) + { + uint32_t period; + int64_t cur_clock, next_irq_clock, lost_clock = 0; + + period = rtc_periodic_clock_ticks(s); ++ s->period = period; ++ + if (!period) { + s->irq_coalesced = 0; + timer_del(s->periodic_timer); +@@ -210,7 +212,7 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) + * if the periodic timer's update is due to period re-configuration, + * we should count the clock since last interrupt. + */ +- if (old_period) { ++ if (old_period && period_change) { + int64_t last_periodic_clock, next_periodic_clock; + + next_periodic_clock = muldiv64(s->next_periodic_time, +@@ -237,7 +239,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) + if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { + uint32_t old_irq_coalesced = s->irq_coalesced; + +- s->period = period; + lost_clock += old_irq_coalesced * old_period; + s->irq_coalesced = lost_clock / s->period; + lost_clock %= s->period; +@@ -267,7 +268,7 @@ static void rtc_periodic_timer(void *opaque) + { + RTCState *s = opaque; + +- periodic_timer_update(s, s->next_periodic_time, 0); ++ periodic_timer_update(s, s->next_periodic_time, s->period, false); + s->cmos_data[RTC_REG_C] |= REG_C_PF; + if (s->cmos_data[RTC_REG_B] & REG_B_PIE) { + s->cmos_data[RTC_REG_C] |= REG_C_IRQF; +@@ -533,7 +534,7 @@ static void cmos_ioport_write(void *opaque, hwaddr addr, + + if (update_periodic_timer) { + periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), +- old_period); ++ old_period, true); + } + + check_update_timer(s); +@@ -572,7 +573,7 @@ static void cmos_ioport_write(void *opaque, hwaddr addr, + + if (update_periodic_timer) { + periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), +- old_period); ++ old_period, true); + } + + check_update_timer(s); +@@ -816,6 +817,7 @@ static int rtc_post_load(void *opaque, int version_id) + s->offset = 0; + check_update_timer(s); + } ++ s->period = rtc_periodic_clock_ticks(s); + + /* The periodic timer is deterministic in record/replay mode, + * so there is no need to update it after loading the vmstate. +@@ -825,7 +827,7 @@ static int rtc_post_load(void *opaque, int version_id) + uint64_t now = qemu_clock_get_ns(rtc_clock); + if (now < s->next_periodic_time || + now > (s->next_periodic_time + get_max_clock_jump())) { +- periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 0); ++ periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period, false); + } + } + +@@ -893,7 +895,7 @@ static void rtc_notify_clock_reset(Notifier *notifier, void *data) + int64_t now = *(int64_t *)data; + + rtc_set_date_from_host(ISA_DEVICE(s)); +- periodic_timer_update(s, now, 0); ++ periodic_timer_update(s, now, s->period, false); + check_update_timer(s); + + if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mc146818rtc-fix-timer-interrupt-reinjection.patch b/SOURCES/kvm-mc146818rtc-fix-timer-interrupt-reinjection.patch new file mode 100644 index 0000000..bcbd958 --- /dev/null +++ b/SOURCES/kvm-mc146818rtc-fix-timer-interrupt-reinjection.patch @@ -0,0 +1,123 @@ +From ae11a1ea30f43abebd96a22988619eb9f52e6a4e Mon Sep 17 00:00:00 2001 +From: Marcelo Tosatti +Date: Wed, 4 Dec 2019 15:21:07 +0100 +Subject: [PATCH 1/3] mc146818rtc: fix timer interrupt reinjection + +RH-Author: Marcelo Tosatti +Message-id: <20191204152436.680299856@amt.cnet> +Patchwork-id: 92889 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 1/3] mc146818rtc: fix timer interrupt reinjection +Bugzilla: 1639098 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Vitaly Kuznetsov + +BZ: 1639098 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=24854309 +BRANCH: rhv7/master-2.12.0 +Upstream: b429de730174b388ea5760e3debb0d542ea3c261 +of pbonzini's for-upstream tree. + +commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt +reinjection when there is no period change by the guest. + +In that case, old_period is 0, which ends up zeroing irq_coalesced +(counter of reinjected interrupts). + +The consequence is Windows 7 is unable to synchronize time via NTP. +Easily reproducible by playing a fullscreen video with cirrus and VNC. + +Fix by not updating s->irq_coalesced when old_period is 0. + +V2: reorganize code (Paolo Bonzini) + +Signed-off-by: Marcelo Tosatti +Message-Id: <20191010123008.GA19158@amt.cnet> +Signed-off-by: Paolo Bonzini +Signed-off-by: Miroslav Rezanina +--- + hw/timer/mc146818rtc.c | 53 +++++++++++++++++++++++++------------------------- + 1 file changed, 27 insertions(+), 26 deletions(-) + +diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c +index 68c353f..296d974 100644 +--- a/hw/timer/mc146818rtc.c ++++ b/hw/timer/mc146818rtc.c +@@ -197,24 +197,28 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) + + period = rtc_periodic_clock_ticks(s); + +- if (period) { +- /* compute 32 khz clock */ +- cur_clock = +- muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND); ++ if (!period) { ++ s->irq_coalesced = 0; ++ timer_del(s->periodic_timer); ++ return; ++ } + +- /* +- * if the periodic timer's update is due to period re-configuration, +- * we should count the clock since last interrupt. +- */ +- if (old_period) { +- int64_t last_periodic_clock, next_periodic_clock; +- +- next_periodic_clock = muldiv64(s->next_periodic_time, +- RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND); +- last_periodic_clock = next_periodic_clock - old_period; +- lost_clock = cur_clock - last_periodic_clock; +- assert(lost_clock >= 0); +- } ++ /* compute 32 khz clock */ ++ cur_clock = ++ muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND); ++ ++ /* ++ * if the periodic timer's update is due to period re-configuration, ++ * we should count the clock since last interrupt. ++ */ ++ if (old_period) { ++ int64_t last_periodic_clock, next_periodic_clock; ++ ++ next_periodic_clock = muldiv64(s->next_periodic_time, ++ RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND); ++ last_periodic_clock = next_periodic_clock - old_period; ++ lost_clock = cur_clock - last_periodic_clock; ++ assert(lost_clock >= 0); + + /* + * s->irq_coalesced can change for two reasons: +@@ -245,22 +249,19 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) + rtc_coalesced_timer_update(s); + } + } else { +- /* ++ /* + * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW + * is not used, we should make the time progress anyway. + */ + lost_clock = MIN(lost_clock, period); + } ++ } + +- assert(lost_clock >= 0 && lost_clock <= period); ++ assert(lost_clock >= 0 && lost_clock <= period); + +- next_irq_clock = cur_clock + period - lost_clock; +- s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1; +- timer_mod(s->periodic_timer, s->next_periodic_time); +- } else { +- s->irq_coalesced = 0; +- timer_del(s->periodic_timer); +- } ++ next_irq_clock = cur_clock + period - lost_clock; ++ s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1; ++ timer_mod(s->periodic_timer, s->next_periodic_time); + } + + static void rtc_periodic_timer(void *opaque) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch b/SOURCES/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch new file mode 100644 index 0000000..7b5c874 --- /dev/null +++ b/SOURCES/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch @@ -0,0 +1,97 @@ +From 95c9f56041b8d3b95644d790e2b3f59587805a01 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 17 Sep 2019 03:47:09 +0200 +Subject: [PATCH 3/4] migration: Do not re-read the clock on pre_save in case + of paused guest + +RH-Author: David Gibson +Message-id: <20190917034709.20167-1-dgibson@redhat.com> +Patchwork-id: 90473 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH] migration: Do not re-read the clock on pre_save in case of paused guest +Bugzilla: 1743508 +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: Dr. David Alan Gilbert + +From: "Maxiwell S. Garcia" + +Re-read the timebase before migrate was ported from x86 commit: + 6053a86fe7bd: kvmclock: reduce kvmclock difference on migration + +The clock move makes the guest knows about the paused time between +the stop and migrate commands. This is an issue in an already-paused +VM because some side effects, like process stalls, could happen +after migration. + +So, this patch checks the runstate of guest in the pre_save handler and +do not re-reads the timebase in case of paused state (cold migration). + +Signed-off-by: Maxiwell S. Garcia +Message-Id: <20190711194702.26598-1-maxiwell@linux.ibm.com> +Signed-off-by: David Gibson +(cherry picked from commit d14f33976282a8744ca1bf1d64e73996c145aa3f) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1743508 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23524287 +Testing: Booted a guest with the scratch qemu build + +Signed-off-by: David Gibson +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/ppc.c | 13 +++++++++---- + target/ppc/cpu-qom.h | 1 + + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index ec4be25..fb90d19 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -877,6 +877,8 @@ static void timebase_save(PPCTimebase *tb) + * there is no need to update it from KVM here + */ + tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset; ++ ++ tb->runstate_paused = runstate_check(RUN_STATE_PAUSED); + } + + static void timebase_load(PPCTimebase *tb) +@@ -923,9 +925,9 @@ void cpu_ppc_clock_vm_state_change(void *opaque, int running, + } + + /* +- * When migrating, read the clock just before migration, +- * so that the guest clock counts during the events +- * between: ++ * When migrating a running guest, read the clock just ++ * before migration, so that the guest clock counts ++ * during the events between: + * + * * vm_stop() + * * +@@ -940,7 +942,10 @@ static int timebase_pre_save(void *opaque) + { + PPCTimebase *tb = opaque; + +- timebase_save(tb); ++ /* guest_timebase won't be overridden in case of paused guest */ ++ if (!tb->runstate_paused) { ++ timebase_save(tb); ++ } + + return 0; + } +diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h +index deaa46a..b900277 100644 +--- a/target/ppc/cpu-qom.h ++++ b/target/ppc/cpu-qom.h +@@ -210,6 +210,7 @@ typedef struct PowerPCCPUClass { + typedef struct PPCTimebase { + uint64_t guest_timebase; + int64_t time_of_the_day_ns; ++ bool runstate_paused; + } PPCTimebase; + + extern const struct VMStateDescription vmstate_ppc_timebase; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Confirm-we-re-quiesced-only-if-the-job-is-pau.patch b/SOURCES/kvm-mirror-Confirm-we-re-quiesced-only-if-the-job-is-pau.patch new file mode 100644 index 0000000..04f1fc9 --- /dev/null +++ b/SOURCES/kvm-mirror-Confirm-we-re-quiesced-only-if-the-job-is-pau.patch @@ -0,0 +1,114 @@ +From ba6d773a79eef6f59687771bc5664814d96b6b03 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Mon, 9 Sep 2019 12:46:40 +0200 +Subject: [PATCH 4/4] mirror: Confirm we're quiesced only if the job is paused + or cancelled + +RH-Author: Sergio Lopez Pascual +Message-id: <20190909124640.53625-2-slp@redhat.com> +Patchwork-id: 90339 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH 1/1] mirror: Confirm we're quiesced only if the job is paused or cancelled +Bugzilla: 1665256 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow + +While child_job_drained_begin() calls to job_pause(), the job doesn't +actually transition between states until it runs again and reaches a +pause point. This means bdrv_drained_begin() may return with some jobs +using the node still having 'busy == true'. + +As a consequence, block_job_detach_aio_context() may get into a +deadlock, waiting for the job to be actually paused, while the coroutine +servicing the job is yielding and doesn't get the opportunity to get +scheduled again. This situation can be reproduced by issuing a +'block-commit' immediately followed by a 'device_del'. + +To ensure bdrv_drained_begin() only returns when the jobs have been +paused, we change mirror_drained_poll() to only confirm it's quiesced +when job->paused == true and there aren't any in-flight requests, except +if we reached that point by a drained section initiated by the +mirror/commit job itself. + +The other block jobs shouldn't need any changes, as the default +drained_poll() behavior is to only confirm it's quiesced if the job is +not busy or completed. + +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 5e771752a1ffba3a99d7d75b6d492b4a86b59e1b) +Signed-off-by: Sergio Lopez +Signed-off-by: Miroslav Rezanina +--- + block/mirror.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block/mirror.c b/block/mirror.c +index 55dc94f..48c907f 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -72,6 +72,7 @@ typedef struct MirrorBlockJob { + int max_iov; + bool initial_zeroing_ongoing; + bool prepared; ++ bool in_drain; + } MirrorBlockJob; + + typedef struct MirrorOp { +@@ -551,6 +552,7 @@ static int mirror_exit_common(Job *job) + + /* The mirror job has no requests in flight any more, but we need to + * drain potential other users of the BDS before changing the graph. */ ++ assert(s->in_drain); + bdrv_drained_begin(target_bs); + bdrv_replace_node(to_replace, target_bs, &local_err); + bdrv_drained_end(target_bs); +@@ -587,6 +589,7 @@ static int mirror_exit_common(Job *job) + blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); + + bdrv_drained_end(src); ++ s->in_drain = false; + bdrv_unref(mirror_top_bs); + bdrv_unref(src); + +@@ -860,10 +863,12 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + */ + trace_mirror_before_drain(s, cnt); + ++ s->in_drain = true; + bdrv_drained_begin(bs); + cnt = bdrv_get_dirty_count(s->dirty_bitmap); + if (cnt > 0 || mirror_flush(s) < 0) { + bdrv_drained_end(bs); ++ s->in_drain = false; + continue; + } + +@@ -911,6 +916,7 @@ immediate_exit: + bdrv_dirty_iter_free(s->dbi); + + if (need_drain) { ++ s->in_drain = true; + bdrv_drained_begin(bs); + } + +@@ -979,6 +985,16 @@ static void mirror_pause(Job *job) + static bool mirror_drained_poll(BlockJob *job) + { + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); ++ ++ /* If the job isn't paused nor cancelled, we can't be sure that it won't ++ * issue more requests. We make an exception if we've reached this point ++ * from one of our own drain sections, to avoid a deadlock waiting for ++ * ourselves. ++ */ ++ if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) { ++ return true; ++ } ++ + return !!s->in_flight; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qdev-add-qdev_add_vm_change_state_handler.patch b/SOURCES/kvm-qdev-add-qdev_add_vm_change_state_handler.patch new file mode 100644 index 0000000..e5a2f73 --- /dev/null +++ b/SOURCES/kvm-qdev-add-qdev_add_vm_change_state_handler.patch @@ -0,0 +1,133 @@ +From 032fc7e1bc37a62ea45d77be9e96847a58bb25c9 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 16 Jul 2019 13:22:14 +0200 +Subject: [PATCH 21/23] qdev: add qdev_add_vm_change_state_handler() + +RH-Author: Stefan Hajnoczi +Message-id: <20190716132215.18503-3-stefanha@redhat.com> +Patchwork-id: 89537 +O-Subject: [RHEL-7.8 RHEL-7.7.z qemu-kvm-rhev PATCH 2/3] qdev: add qdev_add_vm_change_state_handler() +Bugzilla: 1673546 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: John Snow +RH-Acked-by: Kevin Wolf + +Children sometimes depend on their parent's vm change state handler +having completed. Add a vm change state handler API for devices that +guarantees tree depth ordering. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit e965ffa70ac8ddc334dd5990f6907789bd9e6af6) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Miroslav Rezanina +--- + hw/core/Makefile.objs | 1 + + hw/core/vm-change-state-handler.c | 61 +++++++++++++++++++++++++++++++++++++++ + include/hw/qdev-core.h | 5 ++++ + 3 files changed, 67 insertions(+) + create mode 100644 hw/core/vm-change-state-handler.c + +diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs +index e967fb2..28531b0 100644 +--- a/hw/core/Makefile.objs ++++ b/hw/core/Makefile.objs +@@ -7,6 +7,7 @@ common-obj-$(CONFIG_SOFTMMU) += fw-path-provider.o + common-obj-y += irq.o + common-obj-y += hotplug.o + common-obj-$(CONFIG_SOFTMMU) += nmi.o ++common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o + + common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o + common-obj-$(CONFIG_XILINX_AXI) += stream.o +diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c +new file mode 100644 +index 0000000..f814813 +--- /dev/null ++++ b/hw/core/vm-change-state-handler.c +@@ -0,0 +1,61 @@ ++/* ++ * qdev vm change state handlers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see . ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/qdev.h" ++ ++static int qdev_get_dev_tree_depth(DeviceState *dev) ++{ ++ int depth; ++ ++ for (depth = 0; dev; depth++) { ++ BusState *bus = dev->parent_bus; ++ ++ if (!bus) { ++ break; ++ } ++ ++ dev = bus->parent; ++ } ++ ++ return depth; ++} ++ ++/** ++ * qdev_add_vm_change_state_handler: ++ * @dev: the device that owns this handler ++ * @cb: the callback function to be invoked ++ * @opaque: user data passed to the callback function ++ * ++ * This function works like qemu_add_vm_change_state_handler() except callbacks ++ * are invoked in qdev tree depth order. Ordering is desirable when callbacks ++ * of children depend on their parent's callback having completed first. ++ * ++ * For example, when qdev_add_vm_change_state_handler() is used, a host ++ * controller's callback is invoked before the children on its bus when the VM ++ * starts running. The order is reversed when the VM stops running. ++ * ++ * Returns: an entry to be freed with qemu_del_vm_change_state_handler() ++ */ ++VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, ++ VMChangeStateHandler *cb, ++ void *opaque) ++{ ++ int depth = qdev_get_dev_tree_depth(dev); ++ ++ return qemu_add_vm_change_state_handler_prio(cb, opaque, depth); ++} +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 9453588..ff8bd5a 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -6,6 +6,7 @@ + #include "qom/object.h" + #include "hw/irq.h" + #include "hw/hotplug.h" ++#include "sysemu/sysemu.h" + + enum { + DEV_NVECTORS_UNSPECIFIED = -1, +@@ -446,4 +447,8 @@ static inline bool qbus_is_hotpluggable(BusState *bus) + void device_listener_register(DeviceListener *listener); + void device_listener_unregister(DeviceListener *listener); + ++VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, ++ VMChangeStateHandler *cb, ++ void *opaque); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-img-Enable-BDRV_REQ_MAY_UNMAP-in-convert.patch b/SOURCES/kvm-qemu-img-Enable-BDRV_REQ_MAY_UNMAP-in-convert.patch new file mode 100644 index 0000000..0c7448b --- /dev/null +++ b/SOURCES/kvm-qemu-img-Enable-BDRV_REQ_MAY_UNMAP-in-convert.patch @@ -0,0 +1,103 @@ +From c018040299805135af45098641391a2818ddac8a Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Mon, 26 Aug 2019 09:19:44 +0200 +Subject: [PATCH 4/4] qemu-img: Enable BDRV_REQ_MAY_UNMAP in convert + +RH-Author: Maxim Levitsky +Message-id: <20190826091944.16113-2-mlevitsk@redhat.com> +Patchwork-id: 90164 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH v3 1/1] qemu-img: Enable BDRV_REQ_MAY_UNMAP in convert +Bugzilla: 1648622 +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: Nir Soffer + +With Kevin's "block: Fix slow pre-zeroing in qemu-img convert"[1] +(commit c9fdcf202f, 'qemu-img: Use BDRV_REQ_NO_FALLBACK for +pre-zeroing') we skip the pre zero step called like this: + + blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) + +And we write zeroes later using: + + blk_co_pwrite_zeroes(s->target, + sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, 0); + +Since we use flags=0, this is translated to NBD_CMD_WRITE_ZEROES with +NBD_CMD_FLAG_NO_HOLE flag, which cause the NBD server to allocated space +instead of punching a hole. + +Here is an example failure: + +$ dd if=/dev/urandom of=src.img bs=1M count=5 +$ truncate -s 50m src.img +$ truncate -s 50m dst.img +$ nbdkit -f -v -e '' -U nbd.sock file file=dst.img + +$ ./qemu-img convert -n src.img nbd:unix:nbd.sock + +We can see in nbdkit log that it received the NBD_CMD_FLAG_NO_HOLE +(may_trim=0): + +nbdkit: file[1]: debug: newstyle negotiation: flags: export 0x4d +nbdkit: file[1]: debug: pwrite count=2097152 offset=0 +nbdkit: file[1]: debug: pwrite count=2097152 offset=2097152 +nbdkit: file[1]: debug: pwrite count=1048576 offset=4194304 +nbdkit: file[1]: debug: zero count=33554432 offset=5242880 may_trim=0 +nbdkit: file[1]: debug: zero count=13631488 offset=38797312 may_trim=0 +nbdkit: file[1]: debug: flush + +And the image became fully allocated: + +$ qemu-img info dst.img +virtual size: 50M (52428800 bytes) +disk size: 50M + +With this change we see that nbdkit did not receive the +NBD_CMD_FLAG_NO_HOLE (may_trim=1): + +nbdkit: file[1]: debug: newstyle negotiation: flags: export 0x4d +nbdkit: file[1]: debug: pwrite count=2097152 offset=0 +nbdkit: file[1]: debug: pwrite count=2097152 offset=2097152 +nbdkit: file[1]: debug: pwrite count=1048576 offset=4194304 +nbdkit: file[1]: debug: zero count=33554432 offset=5242880 may_trim=1 +nbdkit: file[1]: debug: zero count=13631488 offset=38797312 may_trim=1 +nbdkit: file[1]: debug: flush + +And the file is sparse as expected: + +$ qemu-img info dst.img +virtual size: 50M (52428800 bytes) +disk size: 5.0M + +[1] http://lists.nongnu.org/archive/html/qemu-block/2019-03/msg00761.html + +Signed-off-by: Nir Soffer +Signed-off-by: Kevin Wolf +(cherry picked from commit a3d6ae2299eaab1bced05551d0a0abfbcd9d08d0) +Signed-off-by: Maxim Levitsky +Signed-off-by: Miroslav Rezanina +--- + qemu-img.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/qemu-img.c b/qemu-img.c +index d588183..f7c1483 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -1738,7 +1738,8 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, + } + ret = blk_co_pwrite_zeroes(s->target, + sector_num << BDRV_SECTOR_BITS, +- n << BDRV_SECTOR_BITS, 0); ++ n << BDRV_SECTOR_BITS, ++ BDRV_REQ_MAY_UNMAP); + if (ret < 0) { + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-img-Use-BDRV_REQ_NO_FALLBACK-for-pre-zeroing.patch b/SOURCES/kvm-qemu-img-Use-BDRV_REQ_NO_FALLBACK-for-pre-zeroing.patch new file mode 100644 index 0000000..627a596 --- /dev/null +++ b/SOURCES/kvm-qemu-img-Use-BDRV_REQ_NO_FALLBACK-for-pre-zeroing.patch @@ -0,0 +1,57 @@ +From 7a2aff323f83e6f98f81a2cbe59005e34a094f93 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:57:04 +0200 +Subject: [PATCH 16/23] qemu-img: Use BDRV_REQ_NO_FALLBACK for pre-zeroing + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-9-mlevitsk@redhat.com> +Patchwork-id: 88558 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 8/9] qemu-img: Use BDRV_REQ_NO_FALLBACK for pre-zeroing +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +If qemu-img convert sees that the target image isn't zero-initialised +yet, it tries to do an efficient zero write for the whole image first +to save the overhead of repeated explicit zero writes during the +conversion. Obviously, this provides only an advantage if the +pre-zeroing is actually efficient. Otherwise, we can end up writing +zeroes slowly while zeroing out the whole image, and then overwrite the +same blocks again with real data, potentially doubling the written data. + +Pass BDRV_REQ_NO_FALLBACK to blk_make_zero() to avoid this case. If we +can't efficiently zero out, we'll instead write explicit zeroes only if +there is no data to be written to a block. + +Signed-off-by: Kevin Wolf +Acked-by: Eric Blake + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 +Signed-off-by: Maxim Levitsky + +(Cherry-picked from c9fdcf202f19fc2acdcb1ee0522ff5d61bf8c906, no conflicts) + +Signed-off-by: Miroslav Rezanina +--- + qemu-img.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 5be2abf..d588183 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -1916,7 +1916,7 @@ static int convert_do_copy(ImgConvertState *s) + if (!s->has_zero_init && !s->target_has_backing && + bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) + { +- ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP); ++ ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); + if (ret == 0) { + s->has_zero_init = true; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-io-Add-write-n-for-BDRV_REQ_NO_FALLBACK.patch b/SOURCES/kvm-qemu-io-Add-write-n-for-BDRV_REQ_NO_FALLBACK.patch new file mode 100644 index 0000000..1f309f8 --- /dev/null +++ b/SOURCES/kvm-qemu-io-Add-write-n-for-BDRV_REQ_NO_FALLBACK.patch @@ -0,0 +1,87 @@ +From de5542b5a8dc74f413bf2f1b0d5f4b370aa6801b Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 5 Jun 2019 13:57:05 +0200 +Subject: [PATCH 17/23] qemu-io: Add write -n for BDRV_REQ_NO_FALLBACK + +RH-Author: Maxim Levitsky +Message-id: <20190605135705.24526-10-mlevitsk@redhat.com> +Patchwork-id: 88565 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 9/9] qemu-io: Add write -n for BDRV_REQ_NO_FALLBACK +Bugzilla: 1648622 +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +From: Kevin Wolf + +This makes the new BDRV_REQ_NO_FALLBACK flag available in the qemu-io +write command. + +Signed-off-by: Kevin Wolf +Acked-by: Eric Blake + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622 + +Signed-off-by: Maxim Levitsky +(Cherry picked from c6e3f520c802c5cb2de80576aba7f9f1fe985d8b) + +Signed-off-by: Miroslav Rezanina +--- + qemu-io-cmds.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c +index 9c51e57..ac8c533 100644 +--- a/qemu-io-cmds.c ++++ b/qemu-io-cmds.c +@@ -945,6 +945,7 @@ static void write_help(void) + " -b, -- write to the VM state rather than the virtual disk\n" + " -c, -- write compressed data with blk_write_compressed\n" + " -f, -- use Force Unit Access semantics\n" ++" -n, -- with -z, don't allow slow fallback\n" + " -p, -- ignored for backwards compatibility\n" + " -P, -- use different pattern to fill file\n" + " -C, -- report statistics in a machine parsable format\n" +@@ -963,7 +964,7 @@ static const cmdinfo_t write_cmd = { + .perm = BLK_PERM_WRITE, + .argmin = 2, + .argmax = -1, +- .args = "[-bcCfquz] [-P pattern] off len", ++ .args = "[-bcCfnquz] [-P pattern] off len", + .oneline = "writes a number of bytes at a specified offset", + .help = write_help, + }; +@@ -982,7 +983,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) + int64_t total = 0; + int pattern = 0xcd; + +- while ((c = getopt(argc, argv, "bcCfpP:quz")) != -1) { ++ while ((c = getopt(argc, argv, "bcCfnpP:quz")) != -1) { + switch (c) { + case 'b': + bflag = true; +@@ -996,6 +997,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv) + case 'f': + flags |= BDRV_REQ_FUA; + break; ++ case 'n': ++ flags |= BDRV_REQ_NO_FALLBACK; ++ break; + case 'p': + /* Ignored for backwards compatibility */ + break; +@@ -1036,6 +1040,11 @@ static int write_f(BlockBackend *blk, int argc, char **argv) + return -EINVAL; + } + ++ if ((flags & BDRV_REQ_NO_FALLBACK) && !zflag) { ++ printf("-n requires -z to be specified\n"); ++ return -EINVAL; ++ } ++ + if ((flags & BDRV_REQ_MAY_UNMAP) && !zflag) { + printf("-u requires -z to be specified\n"); + return -EINVAL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qxl-check-release-info-object.patch b/SOURCES/kvm-qxl-check-release-info-object.patch new file mode 100644 index 0000000..a8173a0 --- /dev/null +++ b/SOURCES/kvm-qxl-check-release-info-object.patch @@ -0,0 +1,50 @@ +From 2e9c7edfee60b61e3b6b0c0eba29cb4df6d47f85 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Wed, 19 Jun 2019 16:05:49 +0200 +Subject: [PATCH 18/23] qxl: check release info object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190619160549.15731-2-philmd@redhat.com> +Patchwork-id: 88734 +O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 1/1] qxl: check release info object +Bugzilla: 1712704 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +When releasing spice resources in release_resource() routine, +if release info object 'ext.info' is null, it leads to null +pointer dereference. Add check to avoid it. + +Reported-by: Bugs SysSec +Signed-off-by: Prasad J Pandit +Message-id: 20190425063534.32747-1-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit d52680fc932efb8a2f334cc6993e705ed1e31e99) +Signed-off-by: Miroslav Rezanina +--- + hw/display/qxl.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index a71714c..f757d2e 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -763,6 +763,9 @@ static void interface_release_resource(QXLInstance *sin, + QXLReleaseRing *ring; + uint64_t *item, id; + ++ if (!ext.info) { ++ return; ++ } + if (ext.group_id == MEMSLOT_GROUP_HOST) { + /* host group -> vga mode update request */ + QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch new file mode 100644 index 0000000..9107ed4 --- /dev/null +++ b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch @@ -0,0 +1,71 @@ +From 2cca40617df140d2907a47fb58d15487cbf2af59 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 17 Jan 2020 11:49:41 +0100 +Subject: [PATCH 2/3] slirp: use correct size while emulating IRC commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20200117114942.12236-3-philmd@redhat.com> +Patchwork-id: 93392 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev + RHEL-7.8 qemu-kvm-rhev + RHEL-7.9 qemu-kvm-rhev + RHEL-8.1.0 qemu-kvm + RHEL-8.2.0 qemu-kvm + RHEL-7.7.z qemu-kvm-ma + RHEL-7.8 qemu-kvm-ma + RHEL-7.9 qemu-kvm-ma PATCH 2/3] slirp: use correct size while emulating IRC commands +Bugzilla: 1791563 1791570 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Prasad J Pandit + +While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size +'m->m_size' to write DCC commands via snprintf(3). This may +lead to OOB write access, because 'bptr' points somewhere in +the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) +size to avoid OOB access. + +Reported-by: Vishnu Dev TJ +Signed-off-by: Prasad J Pandit +Reviewed-by: Samuel Thibault +Message-Id: <20200109094228.79764-2-ppandit@redhat.com> +(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Miroslav Rezanina +--- + slirp/tcp_subr.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c +index decfd9b..b60310d 100644 +--- a/slirp/tcp_subr.c ++++ b/slirp/tcp_subr.c +@@ -783,7 +783,7 @@ tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); +@@ -794,7 +794,7 @@ tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); +@@ -805,7 +805,7 @@ tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch new file mode 100644 index 0000000..979210b --- /dev/null +++ b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch @@ -0,0 +1,70 @@ +From 6d692d3045b7102db2a64ea95d040b1fa6277433 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 17 Jan 2020 11:49:42 +0100 +Subject: [PATCH 3/3] slirp: use correct size while emulating commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20200117114942.12236-4-philmd@redhat.com> +Patchwork-id: 93391 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev + RHEL-7.8 qemu-kvm-rhev + RHEL-7.9 qemu-kvm-rhev + RHEL-8.1.0 qemu-kvm + RHEL-8.2.0 qemu-kvm + RHEL-7.7.z qemu-kvm-ma + RHEL-7.8 qemu-kvm-ma + RHEL-7.9 qemu-kvm-ma PATCH 3/3] slirp: use correct size while emulating commands +Bugzilla: 1791563 1791570 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Prasad J Pandit + +While emulating services in tcp_emu(), it uses 'mbuf' size +'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) +size to avoid possible OOB access. + +Signed-off-by: Prasad J Pandit +Signed-off-by: Samuel Thibault +Message-Id: <20200109094228.79764-3-ppandit@redhat.com> +(cherry picked from libslirp commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Miroslav Rezanina +--- + slirp/tcp_subr.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c +index b60310d..b95ba23 100644 +--- a/slirp/tcp_subr.c ++++ b/slirp/tcp_subr.c +@@ -703,7 +703,7 @@ tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "ORT %d,%d,%d,%d,%d,%d\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + return 1; +@@ -736,7 +736,7 @@ tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + +@@ -762,8 +762,8 @@ tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len-1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = snprintf(m->m_data, m->m_size, "%d", +- ntohs(so->so_fport)) + 1; ++ m->m_len = snprintf(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-Export-TAA_NO-bit-to-guests.patch b/SOURCES/kvm-target-i386-Export-TAA_NO-bit-to-guests.patch new file mode 100644 index 0000000..aac0dc0 --- /dev/null +++ b/SOURCES/kvm-target-i386-Export-TAA_NO-bit-to-guests.patch @@ -0,0 +1,48 @@ +From 7aa728ae021e3b29fb5903ae0ff894a5bd40bbdc Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 17 Dec 2019 22:23:41 +0100 +Subject: [PATCH 1/2] target/i386: Export TAA_NO bit to guests + +RH-Author: Eduardo Habkost +Message-id: <20191217222342.1939034-2-ehabkost@redhat.com> +Patchwork-id: 93164 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 1/2] target/i386: Export TAA_NO bit to guests +Bugzilla: 1779530 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina + +From: Pawan Gupta + +TSX Async Abort (TAA) is a side channel attack on internal buffers in +some Intel processors similar to Microachitectural Data Sampling (MDS). + +Some future Intel processors will use the ARCH_CAP_TAA_NO bit in the +IA32_ARCH_CAPABILITIES MSR to report that they are not vulnerable to +TAA. Make this bit available to guests. + +Signed-off-by: Pawan Gupta +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7fac38635e1cc5ebae34eb6530da1009bd5808e4) +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6a1d59c..8d03d0e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1148,7 +1148,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", + "ssb-no", "mds-no", NULL, NULL, +- NULL, NULL, NULL, NULL, ++ "taa-no", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-add-MDS-NO-feature.patch b/SOURCES/kvm-target-i386-add-MDS-NO-feature.patch new file mode 100644 index 0000000..7c2f886 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-MDS-NO-feature.patch @@ -0,0 +1,54 @@ +From b565bf8ec21b5aa9cc4cbadeca016bd7989a1a6c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 13 Aug 2019 02:34:40 +0200 +Subject: [PATCH 2/4] target/i386: add MDS-NO feature + +RH-Author: Eduardo Habkost +Message-id: <20190813023440.3565-1-ehabkost@redhat.com> +Patchwork-id: 89947 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH] target/i386: add MDS-NO feature +Bugzilla: 1716726 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Bandan Das + +From: Paolo Bonzini + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716726 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23024015 + +Microarchitectural Data Sampling is a hardware vulnerability which allows +unprivileged speculative access to data which is available in various CPU +internal buffers. + +Some Intel processors use the ARCH_CAP_MDS_NO bit in the +IA32_ARCH_CAPABILITIES +MSR to report that they are not vulnerable, make it available to guests. + +Signed-off-by: Paolo Bonzini +Message-Id: <20190516185320.28340-1-pbonzini@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 20140a82c67467f53814ca197403d5e1b561a5e5) +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 928e53c..5d6b45b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1147,7 +1147,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", +- "ssb-no", NULL, NULL, NULL, ++ "ssb-no", "mds-no", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch b/SOURCES/kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch new file mode 100644 index 0000000..2d5c95e --- /dev/null +++ b/SOURCES/kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch @@ -0,0 +1,161 @@ +From d84d88a3036a0d5db9b19a1611158946cd362603 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 17 Dec 2019 22:23:42 +0100 +Subject: [PATCH 2/2] target/i386: add support for MSR_IA32_TSX_CTRL + +RH-Author: Eduardo Habkost +Message-id: <20191217222342.1939034-3-ehabkost@redhat.com> +Patchwork-id: 93165 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 2/2] target/i386: add support for MSR_IA32_TSX_CTRL +Bugzilla: 1779530 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina + +From: Paolo Bonzini + +The MSR_IA32_TSX_CTRL MSR can be used to hide TSX (also known as the +Trusty Side-channel Extension). By virtualizing the MSR, KVM guests +can disable TSX and avoid paying the price of mitigating TSX-based +attacks on microarchitectural side channels. + +Reviewed-by: Eduardo Habkost +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2a9758c51e2c2d13fc3845c3d603c11df98b8823) +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 4 ++++ + target/i386/kvm.c | 13 +++++++++++++ + target/i386/machine.c | 20 ++++++++++++++++++++ + 4 files changed, 38 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8d03d0e..4d87879 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1147,7 +1147,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", +- "ssb-no", "mds-no", NULL, NULL, ++ "ssb-no", "mds-no", NULL, "tsx-ctrl", + "taa-no", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 095e695..65c4fda 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -354,6 +354,9 @@ typedef enum X86Seg { + #define MSR_VIRT_SSBD 0xc001011f + #define MSR_IA32_PRED_CMD 0x49 + #define MSR_IA32_ARCH_CAPABILITIES 0x10a ++#define ARCH_CAP_TSX_CTRL_MSR (1<<7) ++ ++#define MSR_IA32_TSX_CTRL 0x122 + #define MSR_IA32_TSCDEADLINE 0x6e0 + + #define FEATURE_CONTROL_LOCKED (1<<0) +@@ -1221,6 +1224,7 @@ typedef struct CPUX86State { + uint64_t msr_smi_count; + + uint32_t pkru; ++ uint32_t tsx_ctrl; + + uint64_t spec_ctrl; + uint64_t virt_ssbd; +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 72901e1..a6e5a87 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -92,6 +92,7 @@ static bool has_msr_hv_stimer; + static bool has_msr_hv_frequencies; + static bool has_msr_xss; + static bool has_msr_spec_ctrl; ++static bool has_msr_tsx_ctrl; + static bool has_msr_virt_ssbd; + static bool has_msr_smi_count; + static bool has_msr_arch_capabs; +@@ -1340,6 +1341,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_SPEC_CTRL: + has_msr_spec_ctrl = true; + break; ++ case MSR_IA32_TSX_CTRL: ++ has_msr_tsx_ctrl = true; ++ break; + case MSR_VIRT_SSBD: + has_msr_virt_ssbd = true; + break; +@@ -1836,6 +1840,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + if (has_msr_spec_ctrl) { + kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl); + } ++ if (has_msr_tsx_ctrl) { ++ kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl); ++ } + if (has_msr_virt_ssbd) { + kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd); + } +@@ -2222,6 +2229,9 @@ static int kvm_get_msrs(X86CPU *cpu) + if (has_msr_spec_ctrl) { + kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0); + } ++ if (has_msr_tsx_ctrl) { ++ kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0); ++ } + if (has_msr_virt_ssbd) { + kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0); + } +@@ -2597,6 +2607,9 @@ static int kvm_get_msrs(X86CPU *cpu) + case MSR_IA32_SPEC_CTRL: + env->spec_ctrl = msrs[i].data; + break; ++ case MSR_IA32_TSX_CTRL: ++ env->tsx_ctrl = msrs[i].data; ++ break; + case MSR_VIRT_SSBD: + env->virt_ssbd = msrs[i].data; + break; +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 52b1eae..6a2d761 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -954,6 +954,25 @@ static const VMStateDescription vmstate_msr_virt_ssbd = { + } + }; + ++static bool msr_tsx_ctrl_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return env->features[FEAT_ARCH_CAPABILITIES] & ARCH_CAP_TSX_CTRL_MSR; ++} ++ ++static const VMStateDescription vmstate_msr_tsx_ctrl = { ++ .name = "cpu/msr_tsx_ctrl", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = msr_tsx_ctrl_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT32(env.tsx_ctrl, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1079,6 +1098,7 @@ VMStateDescription vmstate_x86_cpu = { + &vmstate_msr_intel_pt, + &vmstate_xsave, + &vmstate_msr_virt_ssbd, ++ &vmstate_msr_tsx_ctrl, + NULL + } + }; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tcp_emu-Fix-oob-access.patch b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch new file mode 100644 index 0000000..6c2e897 --- /dev/null +++ b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch @@ -0,0 +1,60 @@ +From 66013de2e0075ae67edd31efb3e86c728ec485fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 17 Jan 2020 11:49:40 +0100 +Subject: [PATCH 1/3] tcp_emu: Fix oob access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20200117114942.12236-2-philmd@redhat.com> +Patchwork-id: 93393 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev + RHEL-7.8 qemu-kvm-rhev + RHEL-7.9 qemu-kvm-rhev + RHEL-8.1.0 qemu-kvm + RHEL-8.2.0 qemu-kvm + RHEL-7.7.z qemu-kvm-ma + RHEL-7.8 qemu-kvm-ma + RHEL-7.9 qemu-kvm-ma PATCH 1/3] tcp_emu: Fix oob access +Bugzilla: 1791563 1791570 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Samuel Thibault + +The main loop only checks for one available byte, while we sometimes +need two bytes. + +(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) +[PMD: backported with style conflicts, + CHANGELOG.md absent in downstream] +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Miroslav Rezanina +--- + slirp/tcp_subr.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c +index 0152f72..decfd9b 100644 +--- a/slirp/tcp_subr.c ++++ b/slirp/tcp_subr.c +@@ -892,6 +892,9 @@ tcp_emu(struct socket *so, struct mbuf *m) + break; + + case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of +@@ -907,6 +910,10 @@ tcp_emu(struct socket *so, struct mbuf *m) + /* This is the field containing the port + * number that RA-player is listening to. + */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + lport = (((u_char*)bptr)[0] << 8) + + ((u_char *)bptr)[1]; + if (lport < 6970) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch b/SOURCES/kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch new file mode 100644 index 0000000..2fbc14e --- /dev/null +++ b/SOURCES/kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch @@ -0,0 +1,127 @@ +From ed9c0b1c244cb299cb0aa52b0dc93979ca503abd Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 1 Oct 2019 19:01:06 +0200 +Subject: [PATCH 1/4] usb: drop unnecessary usb_device_post_load checks + +RH-Author: Dr. David Alan Gilbert +Message-id: <20191001190106.30455-2-dgilbert@redhat.com> +Patchwork-id: 90937 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 1/1] usb: drop unnecessary usb_device_post_load checks +Bugzilla: 1724048 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Igor Mammedov +RH-Acked-by: Stefan Hajnoczi + +From: Jonathan Davies + +In usb_device_post_load, certain values of dev->setup_len or +dev->setup_index can cause -EINVAL to be returned. One example is when +setup_len exceeds 4096, the hard-coded value of sizeof(dev->data_buf). +This can happen through legitimate guest activity and will cause all +subsequent attempts to migrate the guest to fail in vmstate_load_state. + +The values of these variables can be set by USB packets originating in +the guest. There are two ways in which they can be set: in +do_token_setup and in do_parameter in hw/usb/core.c. + +It is easy to craft a USB packet in a guest that causes do_token_setup +to set setup_len to a value larger than 4096. When this has been done +once, all subsequent attempts to migrate the VM will fail in +usb_device_post_load until the VM is next power-cycled or a +smaller-sized USB packet is sent to the device. + +Sample code for achieving this in a VM started with "-device usb-tablet" +running Linux with CONFIG_HIDRAW=y and HID_MAX_BUFFER_SIZE > 4096: + + #include + #include + #include + #include + + int main() { + char buf[4097]; + int fd = open("/dev/hidraw0", O_RDWR|O_NONBLOCK); + + buf[0] = 0x1; + write(fd, buf, 4097); + + return 0; + } + +When this code is run in the VM, qemu will output: + + usb_generic_handle_packet: ctrl buffer too small (4097 > 4096) + +A subsequent attempt to migrate the VM will fail and output the +following on the destination host: + + qemu-kvm: error while loading state for instance 0x0 of device '0000:00:06.7/1/usb-ptr' + qemu-kvm: load of migration failed: Invalid argument + +The idea behind checking the values of setup_len and setup_index before +they are used is correct, but doing it in usb_device_post_load feels +arbitrary, and will cause unnecessary migration failures. Indeed, none +of the commit messages for c60174e8, 9f8e9895 and 719ffe1f justify why +post_load is the right place to do these checks. They correctly point +out that the important thing to protect is the usb_packet_copy. + +Instead, the right place to do the checks is in do_token_setup and +do_parameter. Indeed, there are already some checks here. We can examine +each of the disjuncts currently tested in usb_device_post_load to see +whether any need adding to do_token_setup or do_parameter to improve +safety there: + + * dev->setup_index < 0 + - This test is not needed because setup_index is explicitly set to +0 in do_token_setup and do_parameter. + + * dev->setup_len < 0 + - In both do_token_setup and do_parameter, the value of setup_len +is computed by (s->setup_buf[7] << 8) | s->setup_buf[6]. Since +s->setup_buf is a byte array and setup_len is an int32_t, it's +impossible for this arithmetic to set setup_len's top bit, so it can +never be negative. + + * dev->setup_index > dev->setup_len + - Since setup_index is 0, this is equivalent to the previous test, +so is redundant. + + * dev->setup_len > sizeof(dev->data_buf) + - This condition is already explicitly checked in both +do_token_setup and do_parameter. + +Hence there is no need to bolster the existing checks in do_token_setup +or do_parameter, and we can safely remove these checks from +usb_device_post_load without reducing safety but allowing migrations to +proceed regardless of what USB packets have been generated by the guest. + +Signed-off-by: Jonathan Davies +Message-Id: <20190107175117.23769-1-jonathan.davies@nutanix.com> +Signed-off-by: Gerd Hoffmann +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f30815390adb1ec153327c3832ab378e8bce9808) +Signed-off-by: Miroslav Rezanina +--- + hw/usb/bus.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/hw/usb/bus.c b/hw/usb/bus.c +index 11f7720..5499810 100644 +--- a/hw/usb/bus.c ++++ b/hw/usb/bus.c +@@ -59,12 +59,6 @@ static int usb_device_post_load(void *opaque, int version_id) + } else { + dev->attached = true; + } +- if (dev->setup_index < 0 || +- dev->setup_len < 0 || +- dev->setup_index > dev->setup_len || +- dev->setup_len > sizeof(dev->data_buf)) { +- return -EINVAL; +- } + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch b/SOURCES/kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch new file mode 100644 index 0000000..52c20c1 --- /dev/null +++ b/SOURCES/kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch @@ -0,0 +1,81 @@ +From f55e32a7892964908252bb5fe3719bb22c2de2dd Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 26 Nov 2019 13:27:55 +0100 +Subject: [PATCH] vhost: fix vhost_log size overflow during migration + +RH-Author: Dr. David Alan Gilbert +Message-id: <20191126132755.42248-2-dgilbert@redhat.com> +Patchwork-id: 92686 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH 1/1] vhost: fix vhost_log size overflow during migration +Bugzilla: 1775251 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Peter Xu +RH-Acked-by: Juan Quintela + +From: Li Hangjing + +When a guest which doesn't support multiqueue is migrated with a multi queues +vhost-user-blk deivce, a crash will occur like: + +0 qemu_memfd_alloc (name=, size=562949953421312, seals=, fd=0x7f87171fe8b4, errp=0x7f87171fe8a8) at util/memfd.c:153 +1 0x00007f883559d7cf in vhost_log_alloc (size=70368744177664, share=true) at hw/virtio/vhost.c:186 +2 0x00007f88355a0758 in vhost_log_get (listener=0x7f8838bd7940, enable=1) at qemu-2-12/hw/virtio/vhost.c:211 +3 vhost_dev_log_resize (listener=0x7f8838bd7940, enable=1) at hw/virtio/vhost.c:263 +4 vhost_migration_log (listener=0x7f8838bd7940, enable=1) at hw/virtio/vhost.c:787 +5 0x00007f88355463d6 in memory_global_dirty_log_start () at memory.c:2503 +6 0x00007f8835550577 in ram_init_bitmaps (f=0x7f88384ce600, opaque=0x7f8836024098) at migration/ram.c:2173 +7 ram_init_all (f=0x7f88384ce600, opaque=0x7f8836024098) at migration/ram.c:2192 +8 ram_save_setup (f=0x7f88384ce600, opaque=0x7f8836024098) at migration/ram.c:2219 +9 0x00007f88357a419d in qemu_savevm_state_setup (f=0x7f88384ce600) at migration/savevm.c:1002 +10 0x00007f883579fc3e in migration_thread (opaque=0x7f8837530400) at migration/migration.c:2382 +11 0x00007f8832447893 in start_thread () from /lib64/libpthread.so.0 +12 0x00007f8832178bfd in clone () from /lib64/libc.so.6 + +This is because vhost_get_log_size() returns a overflowed vhost-log size. +In this function, it uses the uninitialized variable vqs->used_phys and +vqs->used_size to get the vhost-log size. + +Signed-off-by: Li Hangjing +Reviewed-by: Xie Yongji +Reviewed-by: Chai Wen +Message-Id: <20190603061524.24076-1-lihangjing@baidu.com> +Cc: qemu-stable@nongnu.org +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 240e647a14df9677b3a501f7b8b870e40aac3fd5) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 1ae68ff..7bdc9c4 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -131,6 +131,11 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + } + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_virtqueue *vq = dev->vqs + i; ++ ++ if (!vq->used_phys && !vq->used_size) { ++ continue; ++ } ++ + vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, + range_get_last(vq->used_phys, vq->used_size)); + } +@@ -168,6 +173,11 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) + } + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_virtqueue *vq = dev->vqs + i; ++ ++ if (!vq->used_phys && !vq->used_size) { ++ continue; ++ } ++ + uint64_t last = vq->used_phys + vq->used_size - 1; + log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-scsi-fixed-virtio_scsi_ctx_check-failed-when-.patch b/SOURCES/kvm-virtio-scsi-fixed-virtio_scsi_ctx_check-failed-when-.patch new file mode 100644 index 0000000..0026d56 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-fixed-virtio_scsi_ctx_check-failed-when-.patch @@ -0,0 +1,144 @@ +From ed8f16b1786f28d3fb6c6cef79cd4f94deae8857 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 28 Nov 2019 11:30:47 +0100 +Subject: [PATCH] virtio-scsi: fixed virtio_scsi_ctx_check failed when + detaching scsi disk +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Sergio Lopez Pascual +Message-id: <20191128113047.253669-2-slp@redhat.com> +Patchwork-id: 92747 +O-Subject: [RHEL-7.8 qemu-kvm-rhev PATCH v2 1/1] virtio-scsi: fixed virtio_scsi_ctx_check failed when detaching scsi disk +Bugzilla: 1764120 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Max Reitz +RH-Acked-by: Maxim Levitsky + +From: Zhengui li + +commit a6f230c move blockbackend back to main AioContext on unplug. It set the AioContext of +SCSIDevice to the main AioContex, but s->ctx is still the iothread AioContex(if the scsi controller +is configure with iothread). So if there are having in-flight requests during unplug, a failing assertion +happend. The bt is below: +(gdb) bt +#0 0x0000ffff86aacbd0 in raise () from /lib64/libc.so.6 +#1 0x0000ffff86aadf7c in abort () from /lib64/libc.so.6 +#2 0x0000ffff86aa6124 in __assert_fail_base () from /lib64/libc.so.6 +#3 0x0000ffff86aa61a4 in __assert_fail () from /lib64/libc.so.6 +#4 0x0000000000529118 in virtio_scsi_ctx_check (d=, s=, s=) at /home/qemu-4.0.0/hw/scsi/virtio-scsi.c:246 +#5 0x0000000000529ec4 in virtio_scsi_handle_cmd_req_prepare (s=0x2779ec00, req=0xffff740397d0) at /home/qemu-4.0.0/hw/scsi/virtio-scsi.c:559 +#6 0x000000000052a228 in virtio_scsi_handle_cmd_vq (s=0x2779ec00, vq=0xffff7c6d7110) at /home/qemu-4.0.0/hw/scsi/virtio-scsi.c:603 +#7 0x000000000052afa8 in virtio_scsi_data_plane_handle_cmd (vdev=, vq=0xffff7c6d7110) at /home/qemu-4.0.0/hw/scsi/virtio-scsi-dataplane.c:59 +#8 0x000000000054d94c in virtio_queue_host_notifier_aio_poll (opaque=) at /home/qemu-4.0.0/hw/virtio/virtio.c:2452 + +assert(blk_get_aio_context(d->conf.blk) == s->ctx) failed. + +To avoid assertion failed, moving the "if" after qdev_simple_device_unplug_cb. + +In addition, to avoid another qemu crash below, add aio_disable_external before +qdev_simple_device_unplug_cb, which disable the further processing of external clients +when doing qdev_simple_device_unplug_cb. +(gdb) bt +#0 scsi_req_unref (req=0xffff6802c6f0) at hw/scsi/scsi-bus.c:1283 +#1 0x00000000005294a4 in virtio_scsi_handle_cmd_req_submit (req=, + s=) at /home/qemu-4.0.0/hw/scsi/virtio-scsi.c:589 +#2 0x000000000052a2a8 in virtio_scsi_handle_cmd_vq (s=s@entry=0x9c90e90, + vq=vq@entry=0xffff7c05f110) at /home/qemu-4.0.0/hw/scsi/virtio-scsi.c:625 +#3 0x000000000052afd8 in virtio_scsi_data_plane_handle_cmd (vdev=, + vq=0xffff7c05f110) at /home/qemu-4.0.0/hw/scsi/virtio-scsi-dataplane.c:60 +#4 0x000000000054d97c in virtio_queue_host_notifier_aio_poll (opaque=) + at /home/qemu-4.0.0/hw/virtio/virtio.c:2447 +#5 0x00000000009b204c in run_poll_handlers_once (ctx=ctx@entry=0x6efea40, + timeout=timeout@entry=0xffff7d7f7308) at util/aio-posix.c:521 +#6 0x00000000009b2b64 in run_poll_handlers (ctx=ctx@entry=0x6efea40, + max_ns=max_ns@entry=4000, timeout=timeout@entry=0xffff7d7f7308) at util/aio-posix.c:559 +#7 0x00000000009b2ca0 in try_poll_mode (ctx=ctx@entry=0x6efea40, timeout=0xffff7d7f7308, + timeout@entry=0xffff7d7f7348) at util/aio-posix.c:594 +#8 0x00000000009b31b8 in aio_poll (ctx=0x6efea40, blocking=blocking@entry=true) + at util/aio-posix.c:636 +#9 0x00000000006973cc in iothread_run (opaque=0x6ebd800) at iothread.c:75 +#10 0x00000000009b592c in qemu_thread_start (args=0x6efef60) at util/qemu-thread-posix.c:502 +#11 0x0000ffff8057f8bc in start_thread () from /lib64/libpthread.so.0 +#12 0x0000ffff804e5f8c in thread_start () from /lib64/libc.so.6 +(gdb) p bus +$1 = (SCSIBus *) 0x0 + +Signed-off-by: Zhengui li +Message-Id: <1563696502-7972-1-git-send-email-lizhengui@huawei.com> +Signed-off-by: Paolo Bonzini +Message-Id: <1563829520-17525-1-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9c5aad84da1c37429d06c193f23a8df6445ed29e) +[slp: we need to get a reference to the underlying BDS to be able +to switch the context after calling qdev_simple_device_unplug_cb(), +as in 2.12 this causes the SCSIDevice to go away immediately] +Signed-off-by: Sergio Lopez + +Signed-off-by: Miroslav Rezanina +--- + hw/scsi/virtio-scsi.c | 30 ++++++++++++++++++++++++++---- + 1 file changed, 26 insertions(+), 4 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 391500b..2bd54b3 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -20,6 +20,7 @@ + #include "qemu/error-report.h" + #include "qemu/iov.h" + #include "sysemu/block-backend.h" ++#include "sysemu/blockdev.h" + #include "hw/scsi/scsi.h" + #include "scsi/constants.h" + #include "hw/virtio/virtio-bus.h" +@@ -839,6 +840,9 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); ++ AioContext *ctx = s->ctx ?: qemu_get_aio_context(); ++ BlockDriverState *bs; ++ DriveInfo *dinfo; + + if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { + virtio_scsi_acquire(s); +@@ -848,13 +852,31 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, + virtio_scsi_release(s); + } + +- if (s->ctx) { ++ /* ++ * This SCSIDevice goes away after calling qdev_simple_device_unplug_cb(), ++ * so get a reference to the underlying BDS here to be able to switch ++ * its AioContext afterwards. ++ */ ++ bs = blk_bs(sd->conf.blk); ++ ++ /* ++ * Drives attached to a legacy device will get auto deleted while ++ * unplugging the latter, so we don't need to switch their context. ++ * Get a reference to dinfo here, which is only NULL for non-legacy ++ * devices, and use it to avoid doing the switch for drives attached ++ * to legacy devices. ++ */ ++ dinfo = blk_legacy_dinfo(sd->conf.blk); ++ ++ aio_disable_external(ctx); ++ qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); ++ aio_enable_external(ctx); ++ ++ if (s->ctx && bs && !dinfo) { + virtio_scsi_acquire(s); +- blk_set_aio_context(sd->conf.blk, qemu_get_aio_context()); ++ bdrv_set_aio_context(bs, qemu_get_aio_context()); + virtio_scsi_release(s); + } +- +- qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); + } + + static struct SCSIBusInfo virtio_scsi_scsi_info = { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-scsi-restart-DMA-after-iothread.patch b/SOURCES/kvm-virtio-scsi-restart-DMA-after-iothread.patch new file mode 100644 index 0000000..4e36ced --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-restart-DMA-after-iothread.patch @@ -0,0 +1,71 @@ +From 64aa185d19f4e4afacd2501831049d6e615b5a84 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 16 Jul 2019 13:22:15 +0200 +Subject: [PATCH 22/23] virtio-scsi: restart DMA after iothread + +RH-Author: Stefan Hajnoczi +Message-id: <20190716132215.18503-4-stefanha@redhat.com> +Patchwork-id: 89535 +O-Subject: [RHEL-7.8 RHEL-7.7.z qemu-kvm-rhev PATCH 3/3] virtio-scsi: restart DMA after iothread +Bugzilla: 1673546 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: John Snow +RH-Acked-by: Kevin Wolf + +When the 'cont' command resumes guest execution the vm change state +handlers are invoked. Unfortunately there is no explicit ordering +between classic qemu_add_vm_change_state_handler() callbacks. When two +layers of code both use vm change state handlers, we don't control which +handler runs first. + +virtio-scsi with iothreads hits a deadlock when a failed SCSI command is +restarted and completes before the iothread is re-initialized. + +This patch uses the new qdev_add_vm_change_state_handler() API to +guarantee that virtio-scsi's virtio change state handler executes before +the SCSI bus children. This way DMA is restarted after the iothread has +re-initialized. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 1a8c091c4ea5db3126514e3f7df678c9ee328802) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Miroslav Rezanina +--- + hw/scsi/scsi-bus.c | 4 ++-- + hw/virtio/virtio.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 5905f6b..ee4c449 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -206,8 +206,8 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp) + error_propagate(errp, local_err); + return; + } +- dev->vmsentry = qemu_add_vm_change_state_handler(scsi_dma_restart_cb, +- dev); ++ dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev), ++ scsi_dma_restart_cb, dev); + } + + static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp) +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3492b20..08a4332 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2306,8 +2306,8 @@ void virtio_init(VirtIODevice *vdev, const char *name, + } else { + vdev->config = NULL; + } +- vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, +- vdev); ++ vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), ++ virtio_vmstate_change, vdev); + vdev->device_endian = virtio_default_endian(); + vdev->use_guest_notifier_mask = true; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vl-add-qemu_add_vm_change_state_handler_prio.patch b/SOURCES/kvm-vl-add-qemu_add_vm_change_state_handler_prio.patch new file mode 100644 index 0000000..df0ca05 --- /dev/null +++ b/SOURCES/kvm-vl-add-qemu_add_vm_change_state_handler_prio.patch @@ -0,0 +1,148 @@ +From f32ee7f16206334c90d2c92517617c08f436ca97 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 16 Jul 2019 13:22:13 +0200 +Subject: [PATCH 20/23] vl: add qemu_add_vm_change_state_handler_prio() + +RH-Author: Stefan Hajnoczi +Message-id: <20190716132215.18503-2-stefanha@redhat.com> +Patchwork-id: 89536 +O-Subject: [RHEL-7.8 RHEL-7.7.z qemu-kvm-rhev PATCH 1/3] vl: add qemu_add_vm_change_state_handler_prio() +Bugzilla: 1673546 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: John Snow +RH-Acked-by: Kevin Wolf + +Add an API for registering vm change state handlers with a well-defined +ordering. This is necessary when handlers depend on each other. + +Small coding style fixes are included to make checkpatch.pl happy. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 60dbc5a1c5176269669ffc26c081ab2cfb7f12f7) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Miroslav Rezanina + +Conflicts: + vl.c + The QTAILQ macros require an explicit name for the head type. +--- + include/sysemu/sysemu.h | 2 ++ + vl.c | 61 +++++++++++++++++++++++++++++++++++++++---------- + 2 files changed, 51 insertions(+), 12 deletions(-) + +diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h +index 2a6f4a5..723c7a9 100644 +--- a/include/sysemu/sysemu.h ++++ b/include/sysemu/sysemu.h +@@ -29,6 +29,8 @@ typedef void VMChangeStateHandler(void *opaque, int running, RunState state); + + VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, + void *opaque); ++VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( ++ VMChangeStateHandler *cb, void *opaque, int priority); + void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); + void vm_state_notify(int running, RunState state); + +diff --git a/vl.c b/vl.c +index 5b337e1..6529d68 100644 +--- a/vl.c ++++ b/vl.c +@@ -1658,28 +1658,58 @@ static int machine_help_func(QemuOpts *opts, MachineState *machine) + struct vm_change_state_entry { + VMChangeStateHandler *cb; + void *opaque; +- QLIST_ENTRY (vm_change_state_entry) entries; ++ QTAILQ_ENTRY(vm_change_state_entry) entries; ++ int priority; + }; + +-static QLIST_HEAD(vm_change_state_head, vm_change_state_entry) vm_change_state_head; ++static QTAILQ_HEAD(VMChangeStateHead, ++ vm_change_state_entry) vm_change_state_head; + +-VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, +- void *opaque) ++/** ++ * qemu_add_vm_change_state_handler_prio: ++ * @cb: the callback to invoke ++ * @opaque: user data passed to the callback ++ * @priority: low priorities execute first when the vm runs and the reverse is ++ * true when the vm stops ++ * ++ * Register a callback function that is invoked when the vm starts or stops ++ * running. ++ * ++ * Returns: an entry to be freed using qemu_del_vm_change_state_handler() ++ */ ++VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( ++ VMChangeStateHandler *cb, void *opaque, int priority) + { + VMChangeStateEntry *e; ++ VMChangeStateEntry *other; + +- e = g_malloc0(sizeof (*e)); +- ++ e = g_malloc0(sizeof(*e)); + e->cb = cb; + e->opaque = opaque; +- QLIST_INSERT_HEAD(&vm_change_state_head, e, entries); ++ e->priority = priority; ++ ++ /* Keep list sorted in ascending priority order */ ++ QTAILQ_FOREACH(other, &vm_change_state_head, entries) { ++ if (priority < other->priority) { ++ QTAILQ_INSERT_BEFORE(other, e, entries); ++ return e; ++ } ++ } ++ ++ QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries); + return e; + } + ++VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, ++ void *opaque) ++{ ++ return qemu_add_vm_change_state_handler_prio(cb, opaque, 0); ++} ++ + void qemu_del_vm_change_state_handler(VMChangeStateEntry *e) + { +- QLIST_REMOVE (e, entries); +- g_free (e); ++ QTAILQ_REMOVE(&vm_change_state_head, e, entries); ++ g_free(e); + } + + void vm_state_notify(int running, RunState state) +@@ -1688,8 +1718,15 @@ void vm_state_notify(int running, RunState state) + + trace_vm_state_notify(running, state); + +- QLIST_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { +- e->cb(e->opaque, running, state); ++ if (running) { ++ QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { ++ e->cb(e->opaque, running, state); ++ } ++ } else { ++ QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, ++ VMChangeStateHead, entries, next) { ++ e->cb(e->opaque, running, state); ++ } + } + } + +@@ -3194,7 +3231,7 @@ int main(int argc, char **argv, char **envp) + } + rtc_clock = QEMU_CLOCK_HOST; + +- QLIST_INIT (&vm_change_state_head); ++ QTAILQ_INIT(&vm_change_state_head); + os_setup_early_signal_handling(); + + cpu_model = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch b/SOURCES/kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch new file mode 100644 index 0000000..e903957 --- /dev/null +++ b/SOURCES/kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch @@ -0,0 +1,501 @@ +From d3ceeb5294b3dfec6fc86cc1111f12923b62e50c Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:23 +0200 +Subject: [PATCH 04/23] x86: Data structure changes to support MSR based + features + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-5-git-send-email-plai@redhat.com> +Patchwork-id: 88535 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 4/8] x86: Data structure changes to support MSR based features +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Robert Hoo + +Add FeatureWordType indicator in struct FeatureWordInfo. +Change feature_word_info[] accordingly. +Change existing functions that refer to feature_word_info[] accordingly. + +Signed-off-by: Robert Hoo +Message-Id: <1539578845-37944-3-git-send-email-robert.hu@linux.intel.com> +[ehabkost: fixed hvf_enabled() case] +Signed-off-by: Eduardo Habkost +(cherry picked from commit 07585923485952bf4cb7da563c9f91fecc85d09c) +Signed-off-by: Paul Lai + +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 197 +++++++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 142 insertions(+), 55 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 4c7364b..3a06d37 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -773,17 +773,36 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + /* missing: + CPUID_XSAVE_XSAVEC, CPUID_XSAVE_XSAVES */ + ++typedef enum FeatureWordType { ++ CPUID_FEATURE_WORD, ++ MSR_FEATURE_WORD, ++} FeatureWordType; ++ + typedef struct FeatureWordInfo { ++ FeatureWordType type; + /* feature flags names are taken from "Intel Processor Identification and + * the CPUID Instruction" and AMD's "CPUID Specification". + * In cases of disagreement between feature naming conventions, + * aliases may be added. + */ + const char *feat_names[32]; +- uint32_t cpuid_eax; /* Input EAX for CPUID */ +- bool cpuid_needs_ecx; /* CPUID instruction uses ECX as input */ +- uint32_t cpuid_ecx; /* Input ECX value for CPUID */ +- int cpuid_reg; /* output register (R_* constant) */ ++ union { ++ /* If type==CPUID_FEATURE_WORD */ ++ struct { ++ uint32_t eax; /* Input EAX for CPUID */ ++ bool needs_ecx; /* CPUID instruction uses ECX as input */ ++ uint32_t ecx; /* Input ECX value for CPUID */ ++ int reg; /* output register (R_* constant) */ ++ } cpuid; ++ /* If type==MSR_FEATURE_WORD */ ++ struct { ++ uint32_t index; ++ struct { /*CPUID that enumerate this MSR*/ ++ FeatureWord cpuid_class; ++ uint32_t cpuid_flag; ++ } cpuid_dep; ++ } msr; ++ }; + uint32_t tcg_features; /* Feature flags supported by TCG */ + uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */ + uint32_t migratable_flags; /* Feature flags known to be migratable */ +@@ -793,6 +812,7 @@ typedef struct FeatureWordInfo { + + static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + [FEAT_1_EDX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "fpu", "vme", "de", "pse", + "tsc", "msr", "pae", "mce", +@@ -803,10 +823,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "fxsr", "sse", "sse2", "ss", + "ht" /* Intel htt */, "tm", "ia64", "pbe", + }, +- .cpuid_eax = 1, .cpuid_reg = R_EDX, ++ .cpuid = {.eax = 1, .reg = R_EDX, }, + .tcg_features = TCG_FEATURES, + }, + [FEAT_1_ECX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "pni" /* Intel,AMD sse3 */, "pclmulqdq", "dtes64", "monitor", + "ds-cpl", "vmx", "smx", "est", +@@ -817,7 +838,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "tsc-deadline", "aes", "xsave", "osxsave", + "avx", "f16c", "rdrand", "hypervisor", + }, +- .cpuid_eax = 1, .cpuid_reg = R_ECX, ++ .cpuid = { .eax = 1, .reg = R_ECX, }, + .tcg_features = TCG_EXT_FEATURES, + }, + /* Feature names that are already defined on feature_name[] but +@@ -826,6 +847,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + * to features[FEAT_8000_0001_EDX] if and only if CPU vendor is AMD. + */ + [FEAT_8000_0001_EDX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL /* fpu */, NULL /* vme */, NULL /* de */, NULL /* pse */, + NULL /* tsc */, NULL /* msr */, NULL /* pae */, NULL /* mce */, +@@ -836,10 +858,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL /* fxsr */, "fxsr-opt", "pdpe1gb", "rdtscp", + NULL, "lm", "3dnowext", "3dnow", + }, +- .cpuid_eax = 0x80000001, .cpuid_reg = R_EDX, ++ .cpuid = { .eax = 0x80000001, .reg = R_EDX, }, + .tcg_features = TCG_EXT2_FEATURES, + }, + [FEAT_8000_0001_ECX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "lahf-lm", "cmp-legacy", "svm", "extapic", + "cr8legacy", "abm", "sse4a", "misalignsse", +@@ -850,7 +873,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "perfctr-nb", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x80000001, .cpuid_reg = R_ECX, ++ .cpuid = { .eax = 0x80000001, .reg = R_ECX, }, + .tcg_features = TCG_EXT3_FEATURES, + /* + * TOPOEXT is always allowed but can't be enabled blindly by +@@ -860,6 +883,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .no_autoenable_flags = CPUID_EXT3_TOPOEXT, + }, + [FEAT_C000_0001_EDX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, "xstore", "xstore-en", + NULL, NULL, "xcrypt", "xcrypt-en", +@@ -870,10 +894,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0xC0000001, .cpuid_reg = R_EDX, ++ .cpuid = { .eax = 0xC0000001, .reg = R_EDX, }, + .tcg_features = TCG_EXT4_FEATURES, + }, + [FEAT_KVM] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock", + "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt", +@@ -884,10 +909,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "kvmclock-stable-bit", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EAX, ++ .cpuid = { .eax = KVM_CPUID_FEATURES, .reg = R_EAX, }, + .tcg_features = TCG_KVM_FEATURES, + }, + [FEAT_KVM_HINTS] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "kvm-hint-dedicated", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -898,7 +924,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EDX, ++ .cpuid = { .eax = KVM_CPUID_FEATURES, .reg = R_EDX, }, + .tcg_features = TCG_KVM_FEATURES, + /* + * KVM hints aren't auto-enabled by -cpu host, they need to be +@@ -907,6 +933,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .no_autoenable_flags = ~0U, + }, + [FEAT_HYPERV_EAX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */, + NULL /* hv_msr_synic_access */, NULL /* hv_msr_stimer_access */, +@@ -920,9 +947,10 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x40000003, .cpuid_reg = R_EAX, ++ .cpuid = { .eax = 0x40000003, .reg = R_EAX, }, + }, + [FEAT_HYPERV_EBX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL /* hv_create_partitions */, NULL /* hv_access_partition_id */, + NULL /* hv_access_memory_pool */, NULL /* hv_adjust_message_buffers */, +@@ -936,9 +964,10 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x40000003, .cpuid_reg = R_EBX, ++ .cpuid = { .eax = 0x40000003, .reg = R_EBX, }, + }, + [FEAT_HYPERV_EDX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL /* hv_mwait */, NULL /* hv_guest_debugging */, + NULL /* hv_perf_monitor */, NULL /* hv_cpu_dynamic_part */, +@@ -951,9 +980,10 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x40000003, .cpuid_reg = R_EDX, ++ .cpuid = { .eax = 0x40000003, .reg = R_EDX, }, + }, + [FEAT_SVM] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "npt", "lbrv", "svm-lock", "nrip-save", + "tsc-scale", "vmcb-clean", "flushbyasid", "decodeassists", +@@ -964,10 +994,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x8000000A, .cpuid_reg = R_EDX, ++ .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, + .tcg_features = TCG_SVM_FEATURES, + }, + [FEAT_7_0_EBX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "fsgsbase", "tsc-adjust", NULL, "bmi1", + "hle", "avx2", NULL, "smep", +@@ -978,12 +1009,15 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "clwb", "intel-pt", "avx512pf", "avx512er", + "avx512cd", "sha-ni", "avx512bw", "avx512vl", + }, +- .cpuid_eax = 7, +- .cpuid_needs_ecx = true, .cpuid_ecx = 0, +- .cpuid_reg = R_EBX, ++ .cpuid = { ++ .eax = 7, ++ .needs_ecx = true, .ecx = 0, ++ .reg = R_EBX, ++ }, + .tcg_features = TCG_7_0_EBX_FEATURES, + }, + [FEAT_7_0_ECX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, "avx512vbmi", "umip", "pku", + "ospke", NULL, "avx512vbmi2", NULL, +@@ -994,12 +1028,15 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "cldemote", NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 7, +- .cpuid_needs_ecx = true, .cpuid_ecx = 0, +- .cpuid_reg = R_ECX, ++ .cpuid = { ++ .eax = 7, ++ .needs_ecx = true, .ecx = 0, ++ .reg = R_ECX, ++ }, + .tcg_features = TCG_7_0_ECX_FEATURES, + }, + [FEAT_7_0_EDX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + NULL, NULL, NULL, NULL, +@@ -1010,13 +1047,16 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, "spec-ctrl", "stibp", + NULL, "arch-capabilities", NULL, "ssbd", + }, +- .cpuid_eax = 7, +- .cpuid_needs_ecx = true, .cpuid_ecx = 0, +- .cpuid_reg = R_EDX, ++ .cpuid = { ++ .eax = 7, ++ .needs_ecx = true, .ecx = 0, ++ .reg = R_EDX, ++ }, + .tcg_features = TCG_7_0_EDX_FEATURES, + .unmigratable_flags = CPUID_7_0_EDX_ARCH_CAPABILITIES, + }, + [FEAT_8000_0007_EDX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -1027,12 +1067,12 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x80000007, +- .cpuid_reg = R_EDX, ++ .cpuid = { .eax = 0x80000007, .reg = R_EDX, }, + .tcg_features = TCG_APM_FEATURES, + .unmigratable_flags = CPUID_APM_INVTSC, + }, + [FEAT_8000_0008_EBX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -1043,12 +1083,12 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "virt-ssbd", NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0x80000008, +- .cpuid_reg = R_EBX, ++ .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, + [FEAT_XSAVE] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + "xsaveopt", "xsavec", "xgetbv1", "xsaves", + NULL, NULL, NULL, NULL, +@@ -1059,12 +1099,15 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 0xd, +- .cpuid_needs_ecx = true, .cpuid_ecx = 1, +- .cpuid_reg = R_EAX, ++ .cpuid = { ++ .eax = 0xd, ++ .needs_ecx = true, .ecx = 1, ++ .reg = R_EAX, ++ }, + .tcg_features = TCG_XSAVE_FEATURES, + }, + [FEAT_6_EAX] = { ++ .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, "arat", NULL, + NULL, NULL, NULL, NULL, +@@ -1075,13 +1118,16 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +- .cpuid_eax = 6, .cpuid_reg = R_EAX, ++ .cpuid = { .eax = 6, .reg = R_EAX, }, + .tcg_features = TCG_6_EAX_FEATURES, + }, + [FEAT_XSAVE_COMP_LO] = { +- .cpuid_eax = 0xD, +- .cpuid_needs_ecx = true, .cpuid_ecx = 0, +- .cpuid_reg = R_EAX, ++ .type = CPUID_FEATURE_WORD, ++ .cpuid = { ++ .eax = 0xD, ++ .needs_ecx = true, .ecx = 0, ++ .reg = R_EAX, ++ }, + .tcg_features = ~0U, + .migratable_flags = XSTATE_FP_MASK | XSTATE_SSE_MASK | + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | +@@ -1089,9 +1135,12 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + XSTATE_PKRU_MASK, + }, + [FEAT_XSAVE_COMP_HI] = { +- .cpuid_eax = 0xD, +- .cpuid_needs_ecx = true, .cpuid_ecx = 0, +- .cpuid_reg = R_EDX, ++ .type = CPUID_FEATURE_WORD, ++ .cpuid = { ++ .eax = 0xD, ++ .needs_ecx = true, .ecx = 0, ++ .reg = R_EDX, ++ }, + .tcg_features = ~0U, + }, + }; +@@ -2846,21 +2895,41 @@ static const TypeInfo host_x86_cpu_type_info = { + + #endif + ++static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) ++{ ++ assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); ++ ++ switch (f->type) { ++ case CPUID_FEATURE_WORD: ++ { ++ const char *reg = get_register_name_32(f->cpuid.reg); ++ assert(reg); ++ return g_strdup_printf("CPUID.%02XH:%s", ++ f->cpuid.eax, reg); ++ } ++ case MSR_FEATURE_WORD: ++ return g_strdup_printf("MSR(%02XH)", ++ f->msr.index); ++ } ++ ++ return NULL; ++} ++ + static void report_unavailable_features(FeatureWord w, uint32_t mask) + { + FeatureWordInfo *f = &feature_word_info[w]; + int i; ++ char *feat_word_str; + + for (i = 0; i < 32; ++i) { + if ((1UL << i) & mask) { +- const char *reg = get_register_name_32(f->cpuid_reg); +- assert(reg); +- warn_report("%s doesn't support requested feature: " +- "CPUID.%02XH:%s%s%s [bit %d]", ++ feat_word_str = feature_word_description(f, i); ++ warn_report("%s doesn't support requested feature: %s%s%s [bit %d]", + accel_uses_host_cpuid() ? "host" : "TCG", +- f->cpuid_eax, reg, ++ feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); ++ g_free(feat_word_str); + } + } + } +@@ -3104,11 +3173,18 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, + + for (w = 0; w < FEATURE_WORDS; w++) { + FeatureWordInfo *wi = &feature_word_info[w]; ++ /* ++ * We didn't have MSR features when "feature-words" was ++ * introduced. Therefore skipped other type entries. ++ */ ++ if (wi->type != CPUID_FEATURE_WORD) { ++ continue; ++ } + X86CPUFeatureWordInfo *qwi = &word_infos[w]; +- qwi->cpuid_input_eax = wi->cpuid_eax; +- qwi->has_cpuid_input_ecx = wi->cpuid_needs_ecx; +- qwi->cpuid_input_ecx = wi->cpuid_ecx; +- qwi->cpuid_register = x86_reg_info_32[wi->cpuid_reg].qapi_enum; ++ qwi->cpuid_input_eax = wi->cpuid.eax; ++ qwi->has_cpuid_input_ecx = wi->cpuid.needs_ecx; ++ qwi->cpuid_input_ecx = wi->cpuid.ecx; ++ qwi->cpuid_register = x86_reg_info_32[wi->cpuid.reg].qapi_enum; + qwi->features = array[w]; + + /* List will be in reverse order, but order shouldn't matter */ +@@ -3464,16 +3540,26 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only) + { + FeatureWordInfo *wi = &feature_word_info[w]; +- uint32_t r; ++ uint32_t r = 0; + + if (kvm_enabled()) { +- r = kvm_arch_get_supported_cpuid(kvm_state, wi->cpuid_eax, +- wi->cpuid_ecx, +- wi->cpuid_reg); ++ switch (wi->type) { ++ case CPUID_FEATURE_WORD: ++ r = kvm_arch_get_supported_cpuid(kvm_state, wi->cpuid.eax, ++ wi->cpuid.ecx, ++ wi->cpuid.reg); ++ break; ++ case MSR_FEATURE_WORD: ++ r = kvm_arch_get_supported_msr_feature(kvm_state, wi->msr.index); ++ break; ++ } + } else if (hvf_enabled()) { +- r = hvf_get_supported_cpuid(wi->cpuid_eax, +- wi->cpuid_ecx, +- wi->cpuid_reg); ++ if (wi->type != CPUID_FEATURE_WORD) { ++ return 0; ++ } ++ r = hvf_get_supported_cpuid(wi->cpuid.eax, ++ wi->cpuid.ecx, ++ wi->cpuid.reg); + } else if (tcg_enabled()) { + r = wi->tcg_features; + } else { +@@ -4534,9 +4620,10 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w) + { + CPUX86State *env = &cpu->env; + FeatureWordInfo *fi = &feature_word_info[w]; +- uint32_t eax = fi->cpuid_eax; ++ uint32_t eax = fi->cpuid.eax; + uint32_t region = eax & 0xF0000000; + ++ assert(feature_word_info[w].type == CPUID_FEATURE_WORD); + if (!env->features[w]) { + return; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch b/SOURCES/kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch new file mode 100644 index 0000000..3d952c6 --- /dev/null +++ b/SOURCES/kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch @@ -0,0 +1,128 @@ +From fbad7e91a2cb3a3610f1013f63d39473ab165b5b Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Tue, 4 Jun 2019 21:47:24 +0200 +Subject: [PATCH 05/23] x86: define a new MSR based feature word -- + FEATURE_WORDS_ARCH_CAPABILITIES + +RH-Author: plai@redhat.com +Message-id: <1559684847-10889-6-git-send-email-plai@redhat.com> +Patchwork-id: 88534 +O-Subject: [RHEL7.7 qemu-kvm-rhev PATCH v4 5/8] x86: define a new MSR based feature word -- FEATURE_WORDS_ARCH_CAPABILITIES +Bugzilla: 1709972 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Miroslav Rezanina + +From: Robert Hoo + +Note RSBA is specially treated -- no matter host support it or not, qemu +pretends it is supported. + +Signed-off-by: Robert Hoo +Message-Id: <1539578845-37944-4-git-send-email-robert.hu@linux.intel.com> +[ehabkost: removed automatic enabling of RSBA] +Reviewed-by: Eduardo Habkost +Signed-off-by: Eduardo Habkost +(cherry picked from commit d86f963694df27f11b3681ffd225c9362de1b634) +Signed-off-by: Paul Lai + +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 24 +++++++++++++++++++++++- + target/i386/cpu.h | 8 ++++++++ + target/i386/kvm.c | 11 +++++++++++ + 3 files changed, 42 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 3a06d37..478c5a4 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1143,6 +1143,27 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + }, + .tcg_features = ~0U, + }, ++ /*Below are MSR exposed features*/ ++ [FEAT_ARCH_CAPABILITIES] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", ++ "ssb-no", NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_ARCH_CAPABILITIES, ++ .cpuid_dep = { ++ FEAT_7_0_EDX, ++ CPUID_7_0_EDX_ARCH_CAPABILITIES ++ } ++ }, ++ }, + }; + + typedef struct X86RegisterInfo32 { +@@ -3550,7 +3571,8 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, + wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: +- r = kvm_arch_get_supported_msr_feature(kvm_state, wi->msr.index); ++ r = kvm_arch_get_supported_msr_feature(kvm_state, ++ wi->msr.index); + break; + } + } else if (hvf_enabled()) { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index eb39724..8ab313e 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -501,6 +501,7 @@ typedef enum FeatureWord { + FEAT_6_EAX, /* CPUID[6].EAX */ + FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ + FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ ++ FEAT_ARCH_CAPABILITIES, + FEATURE_WORDS, + } FeatureWord; + +@@ -728,6 +729,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_TOPOLOGY_LEVEL_SMT (1U << 8) + #define CPUID_TOPOLOGY_LEVEL_CORE (2U << 8) + ++/* MSR Feature Bits */ ++#define MSR_ARCH_CAP_RDCL_NO (1U << 0) ++#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) ++#define MSR_ARCH_CAP_RSBA (1U << 2) ++#define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) ++#define MSR_ARCH_CAP_SSB_NO (1U << 4) ++ + #ifndef HYPERV_SPINLOCK_NEVER_RETRY + #define HYPERV_SPINLOCK_NEVER_RETRY 0xFFFFFFFF + #endif +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 0ecec4a..88a4114 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -1833,6 +1833,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + #endif + ++ /* If host supports feature MSR, write down. */ ++ if (kvm_feature_msrs) { ++ int i; ++ for (i = 0; i < kvm_feature_msrs->nmsrs; i++) ++ if (kvm_feature_msrs->indices[i] == MSR_IA32_ARCH_CAPABILITIES) { ++ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, ++ env->features[FEAT_ARCH_CAPABILITIES]); ++ break; ++ } ++ } ++ + /* + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. +-- +1.8.3.1 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 50f62ae..06ad481 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -108,7 +108,7 @@ Obsoletes: %1%{rhel_ma_suffix} < %{obsoletes_version2} \ Summary: QEMU is a machine emulator and virtualizer Name: %{pkgname}%{?pkgsuffix} Version: 2.12.0 -Release: 33%{?dist}.1 +Release: 44%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 10 License: GPLv2 and GPLv2+ and CC-BY @@ -1854,8 +1854,108 @@ Patch848: kvm-blockdev-fix-missed-target-unref-for-drive-backup.patch Patch849: kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch # For bz#1608226 - [virtual-network][mq] prompt warning "qemu-kvm: unable to start vhost net: 14: falling back on userspace virtio" when boot with win8+ guests with multi-queue Patch850: kvm-vhost_net-don-t-set-backend-for-the-uninitialized-vi.patch -# For bz#1735652 - CVE-2019-14378 qemu-kvm-ma: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-7.7.z] +# For bz#1734753 - CVE-2019-14378 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-7.8] +# For bz#1735653 - CVE-2019-14378 qemu-kvm-ma: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-7.8] Patch851: kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch852: kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch853: kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch854: kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch855: kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch856: kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch857: kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch858: kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch +# For bz#1709972 - [Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev +Patch859: kvm-i386-Make-arch_capabilities-migratable.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch860: kvm-block-Remove-error-messages-in-bdrv_make_zero.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch861: kvm-block-Add-BDRV_REQ_NO_FALLBACK.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch862: kvm-block-Advertise-BDRV_REQ_NO_FALLBACK-in-filter-drive.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch863: kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch864: kvm-file-posix-Factor-out-raw_thread_pool_submit.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch865: kvm-file-posix-Avoid-aio_worker-for-QEMU_AIO_WRITE_ZEROE.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch866: kvm-file-posix-Support-BDRV_REQ_NO_FALLBACK-for-zero-wri.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch867: kvm-qemu-img-Use-BDRV_REQ_NO_FALLBACK-for-pre-zeroing.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch868: kvm-qemu-io-Add-write-n-for-BDRV_REQ_NO_FALLBACK.patch +# For bz#1712704 - CVE-2019-12155 qemu-kvm-rhev: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-7] +Patch869: kvm-qxl-check-release-info-object.patch +# For bz#1721522 - ccid: Fix incorrect dwProtocol advertisement of T=0 +Patch870: kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch +# For bz#1673546 - QEMU gets stuck on resume/cont call from libvirt +Patch871: kvm-vl-add-qemu_add_vm_change_state_handler_prio.patch +# For bz#1673546 - QEMU gets stuck on resume/cont call from libvirt +Patch872: kvm-qdev-add-qdev_add_vm_change_state_handler.patch +# For bz#1673546 - QEMU gets stuck on resume/cont call from libvirt +Patch873: kvm-virtio-scsi-restart-DMA-after-iothread.patch +# For bz#1711643 - qemu aborts in blockCommit: qemu-kvm: block.c:3486: bdrv_replace_node: Assertion `!({ _Static_assert(!(sizeof(*&from->in_flight) > 8), "not expecting: " "sizeof(*&from->in_flight) > ATOMIC_REG_SIZE"); __atomic_load_n(&from->in_flight, 0); })' failed. +Patch874: kvm-block-Drain-source-node-in-bdrv_replace_node.patch +# For bz#1749723 - CVE-2019-15890 qemu-kvm-ma: QEMU: Slirp: use-after-free during packet reassembly [rhel-7] +Patch875: kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +# For bz#1746224 - qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed +Patch876: kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch +# For bz#1743508 - ISST-LTE:RHV4.3 on RHEL7.6 kvm host:Power8:Tuleta-L:lotg7: call traces dumped on guest while performing guest migration (qemu-kvm-rhev) +Patch877: kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch +# For bz#1665256 - Live storage migration fails with: TimeoutError: Timed out during operation: cannot acquire state change lock (held by monitor=remoteDispatchConnectGetAllDomainStats) and the VM becomes 'Not Responding' +Patch878: kvm-mirror-Confirm-we-re-quiesced-only-if-the-job-is-pau.patch +# For bz#1734502 - qemu-kvm: backport cpuidle-haltpoll support +Patch879: kvm-i386-halt-poll-control-MSR-support.patch +# For bz#1716726 - [Intel 7.8 FEAT] MDS_NO exposure to guest - qemu-kvm-rhev +Patch880: kvm-target-i386-add-MDS-NO-feature.patch +# For bz#1743365 - qemu, qemu-img fail to detect alignment with XFS and Gluster/XFS on 4k block device +Patch881: kvm-file-posix-Handle-undetectable-alignment.patch +# For bz#1648622 - [v2v] Migration performance regression +Patch882: kvm-qemu-img-Enable-BDRV_REQ_MAY_UNMAP-in-convert.patch +# For bz#1724048 - Fail to migrate a rhel6.10-mt7.6 guest with dimm device +Patch883: kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch +# For bz#1638472 - [Intel 7.8 Feat] qemu-kvm-rhev Introduce Cascade Lake (CLX) cpu model +Patch884: kvm-i386-Add-new-model-of-Cascadelake-Server.patch +# For bz#1638472 - [Intel 7.8 Feat] qemu-kvm-rhev Introduce Cascade Lake (CLX) cpu model +Patch885: kvm-i386-Disable-OSPKE-on-Cascadelake-Server.patch +# For bz#1638472 - [Intel 7.8 Feat] qemu-kvm-rhev Introduce Cascade Lake (CLX) cpu model +Patch886: kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-Cascadelake-.patch +# For bz#1764120 - [Data plane]virtio_scsi_ctx_check: Assertion `blk_get_aio_context(d->conf.blk) == s->ctx' failed when unplug a device that running block stream on it +Patch887: kvm-virtio-scsi-fixed-virtio_scsi_ctx_check-failed-when-.patch +# For bz#1775251 - qemu-kvm crashes when Windows VM is migrated with multiqueue +Patch888: kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch +# For bz#1639098 - After host update, older windows clients have large time drift +Patch889: kvm-mc146818rtc-fix-timer-interrupt-reinjection.patch +# For bz#1639098 - After host update, older windows clients have large time drift +Patch890: kvm-Revert-mc146818rtc-fix-timer-interrupt-reinjection.patch +# For bz#1639098 - After host update, older windows clients have large time drift +Patch891: kvm-mc146818rtc-fix-timer-interrupt-reinjection-again.patch +# For bz#1779530 - CVE-2019-11135 qemu-kvm-rhev: hw: TSX Transaction Asynchronous Abort (TAA) [rhel-7.8] +Patch892: kvm-target-i386-Export-TAA_NO-bit-to-guests.patch +# For bz#1779530 - CVE-2019-11135 qemu-kvm-rhev: hw: TSX Transaction Asynchronous Abort (TAA) [rhel-7.8] +Patch893: kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch +# For bz#1791563 - CVE-2020-7039 qemu-kvm-rhev: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8] +# For bz#1791570 - CVE-2020-7039 qemu-kvm-ma: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8] +Patch894: kvm-tcp_emu-Fix-oob-access.patch +# For bz#1791563 - CVE-2020-7039 qemu-kvm-rhev: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8] +# For bz#1791570 - CVE-2020-7039 qemu-kvm-ma: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8] +Patch895: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +# For bz#1791563 - CVE-2020-7039 qemu-kvm-rhev: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8] +# For bz#1791570 - CVE-2020-7039 qemu-kvm-ma: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8] +Patch896: kvm-slirp-use-correct-size-while-emulating-commands.patch +# For bz#1794499 - CVE-2020-1711 qemu-kvm-rhev: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-7.8] +# For bz#1794505 - CVE-2020-1711 qemu-kvm-ma: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-7.8] +Patch897: kvm-iscsi-Avoid-potential-for-get_status-overflow.patch +# For bz#1794499 - CVE-2020-1711 qemu-kvm-rhev: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-7.8] +# For bz#1794505 - CVE-2020-1711 qemu-kvm-ma: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-7.8] +Patch898: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -2924,6 +3024,53 @@ ApplyOptionalPatch() %patch849 -p1 %patch850 -p1 %patch851 -p1 +%patch852 -p1 +%patch853 -p1 +%patch854 -p1 +%patch855 -p1 +%patch856 -p1 +%patch857 -p1 +%patch858 -p1 +%patch859 -p1 +%patch860 -p1 +%patch861 -p1 +%patch862 -p1 +%patch863 -p1 +%patch864 -p1 +%patch865 -p1 +%patch866 -p1 +%patch867 -p1 +%patch868 -p1 +%patch869 -p1 +%patch870 -p1 +%patch871 -p1 +%patch872 -p1 +%patch873 -p1 +%patch874 -p1 +%patch875 -p1 +%patch876 -p1 +%patch877 -p1 +%patch878 -p1 +%patch879 -p1 +%patch880 -p1 +%patch881 -p1 +%patch882 -p1 +%patch883 -p1 +%patch884 -p1 +%patch885 -p1 +%patch886 -p1 +%patch887 -p1 +%patch888 -p1 +%patch889 -p1 +%patch890 -p1 +%patch891 -p1 +%patch892 -p1 +%patch893 -p1 +%patch894 -p1 +%patch895 -p1 +%patch896 -p1 +%patch897 -p1 +%patch898 -p1 # Fix executable permission for iotests chmod 755 $(ls tests/qemu-iotests/???) @@ -3271,20 +3418,6 @@ chmod 0644 $RPM_BUILD_ROOT%{_bindir}/qemu-keymap %check export DIFF=diff; make check V=1 -pushd tests/qemu-iotests -%if %{rhev} -./check -v -raw 001 002 003 004 008 009 010 011 012 021 025 032 033 045 048 052 063 077 101 104 106 113 120 132 140 143 145 147 152 157 159 160 162 170 184 194 205 208 218 222 226 227 232 233 236 -./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 062 063 065 066 068 069 072 073 074 080 082 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 140 141 142 143 144 145 147 150 152 156 157 158 159 162 165 170 174 177 179 184 187 188 189 190 191 194 195 196 198 202 203 204 205 206 208 209 214 216 217 218 222 223 226 227 232 233 236 242 246 -./check -v -luks 001 002 003 004 009 010 011 012 021 032 033 052 140 143 145 157 162 174 184 208 218 227 236 -./check -v -nbd 001 002 003 004 008 009 011 021 032 045 077 119 123 132 143 145 147 152 162 184 194 205 208 218 222 236 -# qemu-kvm-ma -%else -./check -v -raw 001 002 003 004 008 009 010 011 012 021 025 032 033 045 048 052 077 101 104 106 113 120 140 143 145 147 157 159 160 162 170 184 205 226 227 232 233 236 -./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 051 051 053 054 057 058 061 062 063 065 066 068 069 072 073 074 082 086 087 089 090 091 097 098 102 103 104 105 107 108 110 111 114 117 120 126 130 133 134 137 140 143 145 147 150 157 158 162 165 170 174 177 179 184 187 188 189 190 195 196 198 202 203 204 205 206 209 214 216 217 223 226 227 232 233 236 242 246 -./check -v -luks 001 002 003 004 008 009 010 011 012 021 032 033 052 140 143 145 157 162 174 184 210 227 236 -./check -v -nbd 001 003 004 008 009 010 021 032 033 045 077 104 119 123 143 145 147 162 184 205 236 -%endif -popd %post # load kvm modules now, so we can make sure no reboot is needed. @@ -3458,10 +3591,123 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog -* Mon Aug 12 2019 Miroslav Rezanina - 2.12.0-33.el7_7.1 -- kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch [bz#1735652] -- Resolves: bz#1735652 - (CVE-2019-14378 qemu-kvm-ma: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-7.7.z]) +* Wed Feb 05 2020 Miroslav Rezanina - 2.12.0-44.el7 +- kvm-iscsi-Avoid-potential-for-get_status-overflow.patch [bz#1794505] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794505] +- Resolves: bz#1794505 + (CVE-2020-1711 qemu-kvm-ma: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-7.8]) + +* Thu Jan 23 2020 Miroslav Rezanina - 2.12.0-43.el7 +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791563 bz#1791570] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791570] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791570] +- Resolves: bz#1791570 + (CVE-2020-7039 qemu-kvm-ma: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-7.8]) + +* Mon Jan 06 2020 Miroslav Rezanina - 2.12.0-42.el7 +- kvm-target-i386-Export-TAA_NO-bit-to-guests.patch [bz#1779530] +- kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch [bz#1779530] +- Resolves: bz#1779530 + (CVE-2019-11135 qemu-kvm-rhev: hw: TSX Transaction Asynchronous Abort (TAA) [rhel-7.8]) + +* Tue Dec 10 2019 Miroslav Rezanina - 2.12.0-41.el7 +- kvm-mc146818rtc-fix-timer-interrupt-reinjection.patch [bz#1639098] +- kvm-Revert-mc146818rtc-fix-timer-interrupt-reinjection.patch [bz#1639098] +- kvm-mc146818rtc-fix-timer-interrupt-reinjection-again.patch [bz#1639098] +- Resolves: bz#1639098 + (After host update, older windows clients have large time drift) + +* Wed Dec 04 2019 Miroslav Rezanina - 2.12.0-40.el7 +- kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch [bz#1775251] +- Resolves: bz#1775251 + (qemu-kvm crashes when Windows VM is migrated with multiqueue) + +* Tue Dec 03 2019 Miroslav Rezanina - 2.12.0-39.el7 +- kvm-virtio-scsi-fixed-virtio_scsi_ctx_check-failed-when-.patch [bz#1764120] +- Resolves: bz#1764120 + ([Data plane]virtio_scsi_ctx_check: Assertion `blk_get_aio_context(d->conf.blk) == s->ctx' failed when unplug a device that running block stream on it) + +* Tue Oct 15 2019 Miroslav Rezanina - 2.12.0-38.el7 +- kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch [bz#1724048] +- kvm-i386-Add-new-model-of-Cascadelake-Server.patch [bz#1638472] +- kvm-i386-Disable-OSPKE-on-Cascadelake-Server.patch [bz#1638472] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-Cascadelake-.patch [bz#1638472] +- Resolves: bz#1638472 + ([Intel 7.8 Feat] qemu-kvm-rhev Introduce Cascade Lake (CLX) cpu model) +- Resolves: bz#1724048 + (Fail to migrate a rhel6.10-mt7.6 guest with dimm device) + +* Thu Sep 26 2019 Miroslav Rezanina - 2.12.0-37.el7 +- kvm-i386-halt-poll-control-MSR-support.patch [bz#1734502] +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1716726] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1743365] +- kvm-qemu-img-Enable-BDRV_REQ_MAY_UNMAP-in-convert.patch [bz#1648622] +- Resolves: bz#1648622 + ([v2v] Migration performance regression) +- Resolves: bz#1716726 + ([Intel 7.8 FEAT] MDS_NO exposure to guest - qemu-kvm-rhev) +- Resolves: bz#1734502 + (qemu-kvm: backport cpuidle-haltpoll support) +- Resolves: bz#1743365 + (qemu, qemu-img fail to detect alignment with XFS and Gluster/XFS on 4k block device) + +* Tue Sep 24 2019 Miroslav Rezanina - 2.12.0-36.el7 +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749723] +- kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746224] +- kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1743508] +- kvm-mirror-Confirm-we-re-quiesced-only-if-the-job-is-pau.patch [bz#1665256] +- Resolves: bz#1665256 + (Live storage migration fails with: TimeoutError: Timed out during operation: cannot acquire state change lock (held by monitor=remoteDispatchConnectGetAllDomainStats) and the VM becomes 'Not Responding') +- Resolves: bz#1743508 + (ISST-LTE:RHV4.3 on RHEL7.6 kvm host:Power8:Tuleta-L:lotg7: call traces dumped on guest while performing guest migration (qemu-kvm-rhev)) +- Resolves: bz#1746224 + (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) +- Resolves: bz#1749723 + (CVE-2019-15890 qemu-kvm-ma: QEMU: Slirp: use-after-free during packet reassembly [rhel-7]) + +* Tue Sep 17 2019 Miroslav Rezanina - 2.12.0-35.el7 +- kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch [bz#1709972] +- kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch [bz#1709972] +- kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch [bz#1709972] +- kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch [bz#1709972] +- kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch [bz#1709972] +- kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch [bz#1709972] +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1709972] +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1709972] +- kvm-block-Remove-error-messages-in-bdrv_make_zero.patch [bz#1648622] +- kvm-block-Add-BDRV_REQ_NO_FALLBACK.patch [bz#1648622] +- kvm-block-Advertise-BDRV_REQ_NO_FALLBACK-in-filter-drive.patch [bz#1648622] +- kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch [bz#1648622] +- kvm-file-posix-Factor-out-raw_thread_pool_submit.patch [bz#1648622] +- kvm-file-posix-Avoid-aio_worker-for-QEMU_AIO_WRITE_ZEROE.patch [bz#1648622] +- kvm-file-posix-Support-BDRV_REQ_NO_FALLBACK-for-zero-wri.patch [bz#1648622] +- kvm-qemu-img-Use-BDRV_REQ_NO_FALLBACK-for-pre-zeroing.patch [bz#1648622] +- kvm-qemu-io-Add-write-n-for-BDRV_REQ_NO_FALLBACK.patch [bz#1648622] +- kvm-qxl-check-release-info-object.patch [bz#1712704] +- kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch [bz#1721522] +- kvm-vl-add-qemu_add_vm_change_state_handler_prio.patch [bz#1673546] +- kvm-qdev-add-qdev_add_vm_change_state_handler.patch [bz#1673546] +- kvm-virtio-scsi-restart-DMA-after-iothread.patch [bz#1673546] +- kvm-block-Drain-source-node-in-bdrv_replace_node.patch [bz#1711643] +- Resolves: bz#1648622 + ([v2v] Migration performance regression) +- Resolves: bz#1673546 + (QEMU gets stuck on resume/cont call from libvirt) +- Resolves: bz#1709972 + ([Intel 7.8 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM qemu-kvm-rhev) +- Resolves: bz#1711643 + (qemu aborts in blockCommit: qemu-kvm: block.c:3486: bdrv_replace_node: Assertion `!({ _Static_assert(!(sizeof(*&from->in_flight) > 8), "not expecting: " "sizeof(*&from->in_flight) > ATOMIC_REG_SIZE"); __atomic_load_n(&from->in_flight, 0); })' failed.) +- Resolves: bz#1712704 + (CVE-2019-12155 qemu-kvm-rhev: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-7]) +- Resolves: bz#1721522 + (ccid: Fix incorrect dwProtocol advertisement of T=0) + +* Thu Sep 05 2019 Miroslav Rezanina - 2.12.0-34.el7 +- kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch [bz#1734753 bz#1735653] +- Resolves: bz#1734753 + (CVE-2019-14378 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-7.8]) +- Resolves: bz#1735653 + (CVE-2019-14378 qemu-kvm-ma: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-7.8]) * Thu Jun 20 2019 Miroslav Rezanina - 2.12.0-33.el7 - kvm-vhost_net-don-t-set-backend-for-the-uninitialized-vi.patch [bz#1608226]