From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Mon, 1 Feb 2021 15:48:11 +0100 Subject: [PATCH 01/53] x86/hyperv: Load/save the Isolation Configuration leaf If bit 22 of Group B Features is set, the guest has access to the Isolation Configuration CPUID leaf. On x86, the first four bits of EAX in this leaf provide the isolation type of the partition; we entail three isolation types: 'SNP' (hardware-based isolation), 'VBS' (software-based isolation), and 'NONE' (no isolation). Signed-off-by: Andrea Parri (Microsoft) Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Arnd Bergmann Cc: x86@kernel.org Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20210201144814.2701-2-parri.andrea@gmail.com Reviewed-by: Michael Kelley Signed-off-by: Wei Liu (cherry picked from commit a6c76bb08dc7f7ff2b1c381002eb6c7211746182) --- arch/x86/hyperv/hv_init.c | 15 +++++++++++++++ arch/x86/include/asm/hyperv-tlfs.h | 15 +++++++++++++++ arch/x86/kernel/cpu/mshyperv.c | 9 +++++++++ include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 5 +++++ 5 files changed, 45 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6375967a8244..6608d50d7aaa 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -555,3 +556,17 @@ bool hv_is_hibernation_supported(void) return acpi_sleep_state_supported(ACPI_STATE_S4); } EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); + +enum hv_isolation_type hv_get_isolation_type(void) +{ + if (!(ms_hyperv.features_b & HV_ISOLATION)) + return HV_ISOLATION_TYPE_NONE; + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); +} +EXPORT_SYMBOL_GPL(hv_get_isolation_type); + +bool hv_is_isolation_supported(void) +{ + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; +} +EXPORT_SYMBOL_GPL(hv_is_isolation_supported); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 0ed20e8bba9e..cc878049c39b 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -22,6 +22,7 @@ #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A +#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 #define HYPERV_CPUID_MIN 0x40000005 @@ -115,6 +116,20 @@ #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) +/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ +#define HV_PARAVISOR_PRESENT BIT(0) + +/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */ +#define HV_ISOLATION_TYPE GENMASK(3, 0) +#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) +#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) + +enum hv_isolation_type { + HV_ISOLATION_TYPE_NONE = 0, + HV_ISOLATION_TYPE_VBS = 1, + HV_ISOLATION_TYPE_SNP = 2 +}; + /* Hyper-V specific model specific registers (MSRs) */ /* MSR used to identify the guest OS. */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 65d11711cd7b..47a95d543658 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -243,6 +243,7 @@ static void __init ms_hyperv_init_platform(void) * Extract the features and hints */ ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); + ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES); ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); @@ -277,6 +278,14 @@ static void __init ms_hyperv_init_platform(void) x86_platform.calibrate_cpu = hv_get_tsc_khz; } + if (ms_hyperv.features_b & HV_ISOLATION) { + ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); + ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); + + pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", + ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); + } + if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { ms_hyperv.nested_features = cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index e73a11850055..20d3cd950204 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -89,6 +89,7 @@ #define HV_ACCESS_STATS BIT(8) #define HV_DEBUGGING BIT(11) #define HV_CPU_POWER_MANAGEMENT BIT(12) +#define HV_ISOLATION BIT(22) /* diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c57799684170..dff58a3db5d5 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -27,11 +27,14 @@ struct ms_hyperv_info { u32 features; + u32 features_b; u32 misc_features; u32 hints; u32 nested_features; u32 max_vp_index; u32 max_lp_index; + u32 isolation_config_a; + u32 isolation_config_b; }; extern struct ms_hyperv_info ms_hyperv; @@ -169,6 +172,8 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); +enum hv_isolation_type hv_get_isolation_type(void); +bool hv_is_isolation_supported(void); void hyperv_cleanup(void); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:26 +0000 Subject: [PATCH 02/53] x86/hyperv: handling hypercall page setup for root When Linux is running as the root partition, the hypercall page will have already been setup by Hyper-V. Copy the content over to the allocated page. Add checks to hv_suspend & co to bail early because they are not supported in this setup yet. Signed-off-by: Lillian Grassin-Drake Signed-off-by: Sunil Muthuswamy Signed-off-by: Nuno Das Neves Co-Developed-by: Lillian Grassin-Drake Co-Developed-by: Sunil Muthuswamy Co-Developed-by: Nuno Das Neves Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-8-wei.liu@kernel.org (cherry picked from commit 80f73c9f7468b15484e3ee4a29870fc9fa0419cc) --- arch/x86/hyperv/hv_init.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6608d50d7aaa..e85d3199a3da 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -27,6 +27,7 @@ #include #include #include +#include int hyperv_init_cpuhp; @@ -265,6 +266,9 @@ static int hv_suspend(void) union hv_x64_msr_hypercall_contents hypercall_msr; int ret; + if (hv_root_partition) + return -EPERM; + /* * Reset the hypercall page as it is going to be invalidated * accross hibernation. Setting hv_hypercall_pg to NULL ensures @@ -409,8 +413,35 @@ void __init hyperv_init(void) rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; - hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + if (hv_root_partition) { + struct page *pg; + void *src, *dst; + + /* + * For the root partition, the hypervisor will set up its + * hypercall page. The hypervisor guarantees it will not show + * up in the root's address space. The root can't change the + * location of the hypercall page. + * + * Order is important here. We must enable the hypercall page + * so it is populated with code, then copy the code to an + * executable page. + */ + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + pg = vmalloc_to_page(hv_hypercall_pg); + dst = kmap(pg); + src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, + MEMREMAP_WB); + BUG_ON(!(src && dst)); + memcpy(dst, src, HV_HYP_PAGE_SIZE); + memunmap(src); + kunmap(pg); + } else { + hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + } /* * hyperv_init() is called before LAPIC is initialized: see @@ -553,7 +584,7 @@ EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); bool hv_is_hibernation_supported(void) { - return acpi_sleep_state_supported(ACPI_STATE_S4); + return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); } EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:23 +0000 Subject: [PATCH 03/53] clocksource/hyperv: use MSR-based access if running as root When Linux runs as the root partition, the setup required for TSC page is different. Luckily Linux also has access to the MSR based clocksource. We can just disable the TSC page clocksource if Linux is the root partition. Signed-off-by: Wei Liu Acked-by: Daniel Lezcano Reviewed-by: Pavel Tatashin Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-5-wei.liu@kernel.org (cherry picked from commit 7d4163c8315729140ad99d6e1ab10dfc7a685640) --- drivers/clocksource/hyperv_timer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index ba04cb381cd3..269a691bd2c4 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -426,6 +426,9 @@ static bool __init hv_init_tsc_clocksource(void) if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) return false; + if (hv_root_partition) + return false; + hv_read_reference_counter = read_hv_clock_tsc; phys_addr = virt_to_phys(hv_get_tsc_page()); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:24 +0000 Subject: [PATCH 04/53] x86/hyperv: allocate output arg pages if required When Linux runs as the root partition, it will need to make hypercalls which return data from the hypervisor. Allocate pages for storing results when Linux runs as the root partition. Signed-off-by: Lillian Grassin-Drake Co-Developed-by: Lillian Grassin-Drake Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-6-wei.liu@kernel.org (cherry picked from commit 5d0f077e0f413b7eca827b16ea8bfc4569e3946c) --- arch/x86/hyperv/hv_init.c | 35 ++++++++++++++++++++++++++++----- arch/x86/include/asm/mshyperv.h | 1 + 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e85d3199a3da..6c576c256e15 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -46,6 +46,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page); void __percpu **hyperv_pcpu_input_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); +void __percpu **hyperv_pcpu_output_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); + u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); @@ -78,12 +81,19 @@ static int hv_cpu_init(unsigned int cpu) void **input_arg; struct page *pg; - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ - pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); + pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0); if (unlikely(!pg)) return -ENOMEM; + + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); *input_arg = page_address(pg); + if (hv_root_partition) { + void **output_arg; + + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *output_arg = page_address(pg + 1); + } hv_get_vp_index(msr_vp_index); @@ -210,14 +220,23 @@ static int hv_cpu_die(unsigned int cpu) unsigned int new_cpu; unsigned long flags; void **input_arg; - void *input_pg = NULL; + void *pg; local_irq_save(flags); input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - input_pg = *input_arg; + pg = *input_arg; *input_arg = NULL; + + if (hv_root_partition) { + void **output_arg; + + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *output_arg = NULL; + } + local_irq_restore(flags); - free_page((unsigned long)input_pg); + + free_pages((unsigned long)pg, hv_root_partition ? 1 : 0); if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); @@ -373,6 +392,12 @@ void __init hyperv_init(void) BUG_ON(hyperv_pcpu_input_arg == NULL); + /* Allocate the per-CPU state for output arg for root */ + if (hv_root_partition) { + hyperv_pcpu_output_arg = alloc_percpu(void *); + BUG_ON(hyperv_pcpu_output_arg == NULL); + } + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 30f76b966857..cf881d0c7c9d 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -78,6 +78,7 @@ extern int hyperv_init_cpuhp; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; +extern void __percpu **hyperv_pcpu_output_arg; static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:21 +0000 Subject: [PATCH 05/53] x86/hyperv: detect if Linux is the root partition For now we can use the privilege flag to check. Stash the value to be used later. Put in a bunch of defines for future use when we want to have more fine-grained detection. Signed-off-by: Wei Liu Reviewed-by: Pavel Tatashin Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-3-wei.liu@kernel.org (cherry picked from commit e997720202b363ba8000d769f114e3c2c5822227) --- arch/x86/include/asm/hyperv-tlfs.h | 10 ++++++++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/cpu/mshyperv.c | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cc878049c39b..9cf3118a80d4 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -21,6 +21,7 @@ #define HYPERV_CPUID_FEATURES 0x40000003 #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A #define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C @@ -104,6 +105,15 @@ /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +/* + * CPU management features identification. + * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. + */ +#define HV_X64_START_LOGICAL_PROCESSOR BIT(0) +#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1) +#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2) +#define HV_X64_RESERVED_IDENTITY_BIT BIT(31) + /* * Virtual processor will never share a physical core with another virtual * processor, except for virtual processors that are reported as sibling SMT diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cf881d0c7c9d..ef06cdac8444 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -240,6 +240,8 @@ int hyperv_fill_flush_guest_mapping_list( struct hv_guest_mapping_flush_list *flush, u64 start_gfn, u64 end_gfn); +extern bool hv_root_partition; + #ifdef CONFIG_X86_64 void hv_apic_init(void); void __init hv_init_spinlocks(void); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 47a95d543658..e8fc8d297b6b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -32,6 +32,10 @@ #include #include +/* Is Linux running as the root partition? */ +bool hv_root_partition; +EXPORT_SYMBOL_GPL(hv_root_partition); + struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -256,6 +260,22 @@ static void __init ms_hyperv_init_platform(void) pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); + /* + * Check CPU management privilege. + * + * To mirror what Windows does we should extract CPU management + * features and use the ReservedIdentityBit to detect if Linux is the + * root partition. But that requires negotiating CPU management + * interface (a process to be finalized). + * + * For now, use the privilege flag as the indicator for running as + * root. + */ + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { + hv_root_partition = true; + pr_info("Hyper-V: running as root partition\n"); + } + /* * Extract host information. */ -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:29 +0000 Subject: [PATCH 06/53] x86/hyperv: implement and use hv_smp_prepare_cpus Microsoft Hypervisor requires the root partition to make a few hypercalls to setup application processors before they can be used. Signed-off-by: Lillian Grassin-Drake Signed-off-by: Sunil Muthuswamy Co-Developed-by: Lillian Grassin-Drake Co-Developed-by: Sunil Muthuswamy Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-11-wei.liu@kernel.org (cherry picked from commit 333abaf5abb396820c4c7c26a8eecc7523c99184) --- arch/x86/kernel/cpu/mshyperv.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e8fc8d297b6b..9fde437f53ea 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -31,6 +31,7 @@ #include #include #include +#include /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -230,6 +231,32 @@ static void __init hv_smp_prepare_boot_cpu(void) hv_init_spinlocks(); #endif } + +static void __init hv_smp_prepare_cpus(unsigned int max_cpus) +{ +#ifdef CONFIG_X86_64 + int i; + int ret; +#endif + + native_smp_prepare_cpus(max_cpus); + +#ifdef CONFIG_X86_64 + for_each_present_cpu(i) { + if (i == 0) + continue; + ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); + BUG_ON(ret); + } + + for_each_present_cpu(i) { + if (i == 0) + continue; + ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i); + BUG_ON(ret); + } +#endif +} #endif static void __init ms_hyperv_init_platform(void) @@ -393,6 +420,8 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; + if (hv_root_partition) + smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; # endif /* -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:20 +0000 Subject: [PATCH 07/53] asm-generic/hyperv: change HV_CPU_POWER_MANAGEMENT to HV_CPU_MANAGEMENT This makes the name match Hyper-V TLFS. Signed-off-by: Wei Liu Reviewed-by: Vitaly Kuznetsov Reviewed-by: Pavel Tatashin Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-2-wei.liu@kernel.org (cherry picked from commit 8f1d14cb835672cd27f6533f22f4c73e60a30727) --- include/asm-generic/hyperv-tlfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 20d3cd950204..e232ddcb0a2d 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -88,7 +88,7 @@ #define HV_CONNECT_PORT BIT(7) #define HV_ACCESS_STATS BIT(8) #define HV_DEBUGGING BIT(11) -#define HV_CPU_POWER_MANAGEMENT BIT(12) +#define HV_CPU_MANAGEMENT BIT(12) #define HV_ISOLATION BIT(22) -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:32 +0000 Subject: [PATCH 08/53] asm-generic/hyperv: introduce hv_device_id and auxiliary structures We will need to identify the device we want Microsoft Hypervisor to manipulate. Introduce the data structures for that purpose. They will be used in a later patch. Signed-off-by: Sunil Muthuswamy Co-Developed-by: Sunil Muthuswamy Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-14-wei.liu@kernel.org (cherry picked from commit 12434e5fb6aed4655340ce74cd2a0dd859dff5bd) --- include/asm-generic/hyperv-tlfs.h | 79 +++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index e232ddcb0a2d..ccc81c277d09 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -495,4 +495,83 @@ struct hv_set_vp_registers_input { } element[]; } __packed; +enum hv_device_type { + HV_DEVICE_TYPE_LOGICAL = 0, + HV_DEVICE_TYPE_PCI = 1, + HV_DEVICE_TYPE_IOAPIC = 2, + HV_DEVICE_TYPE_ACPI = 3, +}; + +typedef u16 hv_pci_rid; +typedef u16 hv_pci_segment; +typedef u64 hv_logical_device_id; +union hv_pci_bdf { + u16 as_uint16; + + struct { + u8 function:3; + u8 device:5; + u8 bus; + }; +} __packed; + +union hv_pci_bus_range { + u16 as_uint16; + + struct { + u8 subordinate_bus; + u8 secondary_bus; + }; +} __packed; + +union hv_device_id { + u64 as_uint64; + + struct { + u64 reserved0:62; + u64 device_type:2; + }; + + /* HV_DEVICE_TYPE_LOGICAL */ + struct { + u64 id:62; + u64 device_type:2; + } logical; + + /* HV_DEVICE_TYPE_PCI */ + struct { + union { + hv_pci_rid rid; + union hv_pci_bdf bdf; + }; + + hv_pci_segment segment; + union hv_pci_bus_range shadow_bus_range; + + u16 phantom_function_bits:2; + u16 source_shadow:1; + + u16 rsvdz0:11; + u16 device_type:2; + } pci; + + /* HV_DEVICE_TYPE_IOAPIC */ + struct { + u8 ioapic_id; + u8 rsvdz0; + u16 rsvdz1; + u16 rsvdz2; + + u16 rsvdz3:14; + u16 device_type:2; + } ioapic; + + /* HV_DEVICE_TYPE_ACPI */ + struct { + u32 input_mapping_base; + u32 input_mapping_count:30; + u32 device_type:2; + } acpi; +} __packed; + #endif -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:25 +0000 Subject: [PATCH 09/53] x86/hyperv: extract partition ID from Microsoft Hypervisor if necessary We will need the partition ID for executing some hypercalls later. Signed-off-by: Lillian Grassin-Drake Co-Developed-by: Sunil Muthuswamy Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-7-wei.liu@kernel.org (cherry picked from commit 99a0f46af6a7715147e81c558d558021aad4e207) --- arch/x86/hyperv/hv_init.c | 26 ++++++++++++++++++++++++++ arch/x86/include/asm/mshyperv.h | 2 ++ include/asm-generic/hyperv-tlfs.h | 6 ++++++ 3 files changed, 34 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6c576c256e15..aeea8fbf3c23 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -30,6 +30,8 @@ #include int hyperv_init_cpuhp; +u64 hv_current_partition_id = ~0ull; +EXPORT_SYMBOL_GPL(hv_current_partition_id); void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -358,6 +360,24 @@ static void __init hv_stimer_setup_percpu_clockev(void) old_setup_percpu_clockev(); } +static void __init hv_get_partition_id(void) +{ + struct hv_get_partition_id *output_page; + u64 status; + unsigned long flags; + + local_irq_save(flags); + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { + /* No point in proceeding if this failed */ + pr_err("Failed to get partition ID: %lld\n", status); + BUG(); + } + hv_current_partition_id = output_page->partition_id; + local_irq_restore(flags); +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -485,6 +505,12 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); hyperv_init_cpuhp = cpuhp; + + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) + hv_get_partition_id(); + + BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); + return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ef06cdac8444..b8324202d850 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -80,6 +80,8 @@ extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; extern void __percpu **hyperv_pcpu_output_arg; +extern u64 hv_current_partition_id; + static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index ccc81c277d09..cbc13b7c7022 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -142,6 +142,7 @@ struct ms_hyperv_tsc_page { #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 #define HVCALL_SEND_IPI_EX 0x0015 +#define HVCALL_GET_PARTITION_ID 0x0046 #define HVCALL_GET_VP_REGISTERS 0x0050 #define HVCALL_SET_VP_REGISTERS 0x0051 #define HVCALL_POST_MESSAGE 0x005c @@ -408,6 +409,11 @@ struct hv_tlb_flush_ex { u64 gva_list[]; } __packed; +/* HvGetPartitionId hypercall (output only) */ +struct hv_get_partition_id { + u64 partition_id; +} __packed; + /* HvRetargetDeviceInterrupt hypercall */ union hv_msi_entry { u64 as_uint64; -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:28 +0000 Subject: [PATCH 10/53] x86/hyperv: provide a bunch of helper functions They are used to deposit pages into Microsoft Hypervisor and bring up logical and virtual processors. Signed-off-by: Lillian Grassin-Drake Signed-off-by: Sunil Muthuswamy Signed-off-by: Nuno Das Neves Co-Developed-by: Lillian Grassin-Drake Co-Developed-by: Sunil Muthuswamy Co-Developed-by: Nuno Das Neves Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-10-wei.liu@kernel.org (cherry picked from commit 86b5ec3552f3c09694e6f7934834b0a2a3aeebbe) --- arch/x86/hyperv/Makefile | 2 +- arch/x86/hyperv/hv_proc.c | 219 ++++++++++++++++++++++++++++++ arch/x86/include/asm/mshyperv.h | 4 + include/asm-generic/hyperv-tlfs.h | 67 +++++++++ 4 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 arch/x86/hyperv/hv_proc.c diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 89b1f74d3225..565358020921 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := hv_init.o mmu.o nested.o -obj-$(CONFIG_X86_64) += hv_apic.o +obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o ifdef CONFIG_X86_64 obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c new file mode 100644 index 000000000000..60461e598239 --- /dev/null +++ b/arch/x86/hyperv/hv_proc.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * See struct hv_deposit_memory. The first u64 is partition ID, the rest + * are GPAs. + */ +#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) + +/* Deposits exact number of pages. Must be called with interrupts enabled. */ +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) +{ + struct page **pages, *page; + int *counts; + int num_allocations; + int i, j, page_count; + int order; + u64 status; + int ret; + u64 base_pfn; + struct hv_deposit_memory *input_page; + unsigned long flags; + + if (num_pages > HV_DEPOSIT_MAX) + return -E2BIG; + if (!num_pages) + return 0; + + /* One buffer for page pointers and counts */ + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + pages = page_address(page); + + counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); + if (!counts) { + free_page((unsigned long)pages); + return -ENOMEM; + } + + /* Allocate all the pages before disabling interrupts */ + i = 0; + + while (num_pages) { + /* Find highest order we can actually allocate */ + order = 31 - __builtin_clz(num_pages); + + while (1) { + pages[i] = alloc_pages_node(node, GFP_KERNEL, order); + if (pages[i]) + break; + if (!order) { + ret = -ENOMEM; + num_allocations = i; + goto err_free_allocations; + } + --order; + } + + split_page(pages[i], order); + counts[i] = 1 << order; + num_pages -= counts[i]; + i++; + } + num_allocations = i; + + local_irq_save(flags); + + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input_page->partition_id = partition_id; + + /* Populate gpa_page_list - these will fit on the input page */ + for (i = 0, page_count = 0; i < num_allocations; ++i) { + base_pfn = page_to_pfn(pages[i]); + for (j = 0; j < counts[i]; ++j, ++page_count) + input_page->gpa_page_list[page_count] = base_pfn + j; + } + status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, + page_count, 0, input_page, NULL); + local_irq_restore(flags); + + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { + pr_err("Failed to deposit pages: %lld\n", status); + ret = status; + goto err_free_allocations; + } + + ret = 0; + goto free_buf; + +err_free_allocations: + for (i = 0; i < num_allocations; ++i) { + base_pfn = page_to_pfn(pages[i]); + for (j = 0; j < counts[i]; ++j) + __free_page(pfn_to_page(base_pfn + j)); + } + +free_buf: + free_page((unsigned long)pages); + kfree(counts); + return ret; +} + +int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) +{ + struct hv_add_logical_processor_in *input; + struct hv_add_logical_processor_out *output; + u64 status; + unsigned long flags; + int ret = 0; + int pxm = node_to_pxm(node); + + /* + * When adding a logical processor, the hypervisor may return + * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more + * pages and retry. + */ + do { + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + /* We don't do anything with the output right now */ + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + input->lp_index = lp_index; + input->apic_id = apic_id; + input->flags = 0; + input->proximity_domain_info.domain_id = pxm; + input->proximity_domain_info.flags.reserved = 0; + input->proximity_domain_info.flags.proximity_info_valid = 1; + input->proximity_domain_info.flags.proximity_preferred = 1; + status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, + input, output); + local_irq_restore(flags); + + status &= HV_HYPERCALL_RESULT_MASK; + + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { + if (status != HV_STATUS_SUCCESS) { + pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, + lp_index, apic_id, status); + ret = status; + } + break; + } + ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); + } while (!ret); + + return ret; +} + +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) +{ + struct hv_create_vp *input; + u64 status; + unsigned long irq_flags; + int ret = 0; + int pxm = node_to_pxm(node); + + /* Root VPs don't seem to need pages deposited */ + if (partition_id != hv_current_partition_id) { + /* The value 90 is empirically determined. It may change. */ + ret = hv_call_deposit_pages(node, partition_id, 90); + if (ret) + return ret; + } + + do { + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input->partition_id = partition_id; + input->vp_index = vp_index; + input->flags = flags; + input->subnode_type = HvSubnodeAny; + if (node != NUMA_NO_NODE) { + input->proximity_domain_info.domain_id = pxm; + input->proximity_domain_info.flags.reserved = 0; + input->proximity_domain_info.flags.proximity_info_valid = 1; + input->proximity_domain_info.flags.proximity_preferred = 1; + } else { + input->proximity_domain_info.as_uint64 = 0; + } + status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); + local_irq_restore(irq_flags); + + status &= HV_HYPERCALL_RESULT_MASK; + + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { + if (status != HV_STATUS_SUCCESS) { + pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, + vp_index, flags, status); + ret = status; + } + break; + } + ret = hv_call_deposit_pages(node, partition_id, 1); + + } while (!ret); + + return ret; +} + diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b8324202d850..f9119781f2bb 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -82,6 +82,10 @@ extern void __percpu **hyperv_pcpu_output_arg; extern u64 hv_current_partition_id; +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); +int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); + static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index cbc13b7c7022..6e6a129516df 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -143,6 +143,8 @@ struct ms_hyperv_tsc_page { #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 #define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_GET_PARTITION_ID 0x0046 +#define HVCALL_DEPOSIT_MEMORY 0x0048 +#define HVCALL_CREATE_VP 0x004e #define HVCALL_GET_VP_REGISTERS 0x0050 #define HVCALL_SET_VP_REGISTERS 0x0051 #define HVCALL_POST_MESSAGE 0x005c @@ -150,6 +152,7 @@ struct ms_hyperv_tsc_page { #define HVCALL_POST_DEBUG_DATA 0x0069 #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a #define HVCALL_RESET_DEBUG_SESSION 0x006b +#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 #define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 @@ -414,6 +417,70 @@ struct hv_get_partition_id { u64 partition_id; } __packed; +/* HvDepositMemory hypercall */ +struct hv_deposit_memory { + u64 partition_id; + u64 gpa_page_list[]; +} __packed; + +struct hv_proximity_domain_flags { + u32 proximity_preferred : 1; + u32 reserved : 30; + u32 proximity_info_valid : 1; +} __packed; + +/* Not a union in windows but useful for zeroing */ +union hv_proximity_domain_info { + struct { + u32 domain_id; + struct hv_proximity_domain_flags flags; + }; + u64 as_uint64; +} __packed; + +struct hv_lp_startup_status { + u64 hv_status; + u64 substatus1; + u64 substatus2; + u64 substatus3; + u64 substatus4; + u64 substatus5; + u64 substatus6; +} __packed; + +/* HvAddLogicalProcessor hypercall */ +struct hv_add_logical_processor_in { + u32 lp_index; + u32 apic_id; + union hv_proximity_domain_info proximity_domain_info; + u64 flags; +} __packed; + +struct hv_add_logical_processor_out { + struct hv_lp_startup_status startup_status; +} __packed; + +enum HV_SUBNODE_TYPE +{ + HvSubnodeAny = 0, + HvSubnodeSocket = 1, + HvSubnodeAmdNode = 2, + HvSubnodeL3 = 3, + HvSubnodeCount = 4, + HvSubnodeInvalid = -1 +}; + +/* HvCreateVp hypercall */ +struct hv_create_vp { + u64 partition_id; + u32 vp_index; + u8 padding[3]; + u8 subnode_type; + u64 subnode_id; + union hv_proximity_domain_info proximity_domain_info; + u64 flags; +} __packed; + /* HvRetargetDeviceInterrupt hypercall */ union hv_msi_entry { u64 as_uint64; -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:33 +0000 Subject: [PATCH 11/53] asm-generic/hyperv: import data structures for mapping device interrupts Signed-off-by: Sunil Muthuswamy Co-Developed-by: Sunil Muthuswamy Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-15-wei.liu@kernel.org (cherry picked from commit 466a9c3f88d04152ca83e840ca940c5f700402ac) --- arch/x86/include/asm/hyperv-tlfs.h | 13 +++++++++++ include/asm-generic/hyperv-tlfs.h | 36 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 9cf3118a80d4..19ac499ab251 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -541,6 +541,19 @@ struct hv_partition_assist_pg { u32 tlb_lock_count; }; +enum hv_interrupt_type { + HV_X64_INTERRUPT_TYPE_FIXED = 0x0000, + HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001, + HV_X64_INTERRUPT_TYPE_SMI = 0x0002, + HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003, + HV_X64_INTERRUPT_TYPE_NMI = 0x0004, + HV_X64_INTERRUPT_TYPE_INIT = 0x0005, + HV_X64_INTERRUPT_TYPE_SIPI = 0x0006, + HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007, + HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008, + HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009, + HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A, +}; #include diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 6e6a129516df..b6ed949be3bf 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -153,6 +153,8 @@ struct ms_hyperv_tsc_page { #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a #define HVCALL_RESET_DEBUG_SESSION 0x006b #define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 +#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c +#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d #define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 @@ -647,4 +649,38 @@ union hv_device_id { } acpi; } __packed; +enum hv_interrupt_trigger_mode { + HV_INTERRUPT_TRIGGER_MODE_EDGE = 0, + HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1, +}; + +struct hv_device_interrupt_descriptor { + u32 interrupt_type; + u32 trigger_mode; + u32 vector_count; + u32 reserved; + struct hv_device_interrupt_target target; +} __packed; + +struct hv_input_map_device_interrupt { + u64 partition_id; + u64 device_id; + u64 flags; + struct hv_interrupt_entry logical_interrupt_entry; + struct hv_device_interrupt_descriptor interrupt_descriptor; +} __packed; + +struct hv_output_map_device_interrupt { + struct hv_interrupt_entry interrupt_entry; +} __packed; + +struct hv_input_unmap_device_interrupt { + u64 partition_id; + u64 device_id; + struct hv_interrupt_entry interrupt_entry; +} __packed; + +#define HV_SOURCE_SHADOW_NONE 0x0 +#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 + #endif -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:30 +0000 Subject: [PATCH 12/53] asm-generic/hyperv: update hv_msi_entry We will soon need to access fields inside the MSI address and MSI data fields. Introduce hv_msi_address_register and hv_msi_data_register. Fix up one user of hv_msi_entry in mshyperv.h. No functional change expected. Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-12-wei.liu@kernel.org (cherry picked from commit d589ae61bc27b2b9aaac0bf20a9077b6fbda32b6) --- arch/x86/include/asm/mshyperv.h | 4 ++-- include/asm-generic/hyperv-tlfs.h | 28 ++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f9119781f2bb..7bd4022da061 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -259,8 +259,8 @@ static inline void hv_apic_init(void) {} static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, struct msi_desc *msi_desc) { - msi_entry->address = msi_desc->msg.address_lo; - msi_entry->data = msi_desc->msg.data; + msi_entry->address.as_uint32 = msi_desc->msg.address_lo; + msi_entry->data.as_uint32 = msi_desc->msg.data; } #else /* CONFIG_HYPERV */ diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index b6ed949be3bf..2040b196fe59 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -483,12 +483,36 @@ struct hv_create_vp { u64 flags; } __packed; +union hv_msi_address_register { + u32 as_uint32; + struct { + u32 reserved1:2; + u32 destination_mode:1; + u32 redirection_hint:1; + u32 reserved2:8; + u32 destination_id:8; + u32 msi_base:12; + }; +} __packed; + +union hv_msi_data_register { + u32 as_uint32; + struct { + u32 vector:8; + u32 delivery_mode:3; + u32 reserved1:3; + u32 level_assert:1; + u32 trigger_mode:1; + u32 reserved2:16; + }; +} __packed; + /* HvRetargetDeviceInterrupt hypercall */ union hv_msi_entry { u64 as_uint64; struct { - u32 address; - u32 data; + union hv_msi_address_register address; + union hv_msi_data_register data; } __packed; }; -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:31 +0000 Subject: [PATCH 13/53] asm-generic/hyperv: update hv_interrupt_entry We will soon use the same structure to handle IO-APIC interrupts as well. Introduce an enum to identify the source and a data structure for IO-APIC RTE. While at it, update pci-hyperv.c to use the enum. No functional change. Signed-off-by: Wei Liu Acked-by: Rob Herring Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-13-wei.liu@kernel.org (cherry picked from commit b59fb7b60d47b2af3a114daf0ae198aa23921698) --- drivers/pci/controller/pci-hyperv.c | 2 +- include/asm-generic/hyperv-tlfs.h | 36 +++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ad3e3cde1c20..7fd8cd554675 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1215,7 +1215,7 @@ static void hv_irq_unmask(struct irq_data *data) params = &hbus->retarget_msi_interrupt_params; memset(params, 0, sizeof(*params)); params->partition_id = HV_PARTITION_ID_SELF; - params->int_entry.source = 1; /* MSI(-X) */ + params->int_entry.source = HV_INTERRUPT_SOURCE_MSI; hv_set_msi_entry_from_desc(¶ms->int_entry.msi_entry, msi_desc); params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | (hbus->hdev->dev_instance.b[4] << 16) | diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 2040b196fe59..83448e837ded 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -483,6 +483,11 @@ struct hv_create_vp { u64 flags; } __packed; +enum hv_interrupt_source { + HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */ + HV_INTERRUPT_SOURCE_IOAPIC, +}; + union hv_msi_address_register { u32 as_uint32; struct { @@ -516,10 +521,37 @@ union hv_msi_entry { } __packed; }; +union hv_ioapic_rte { + u64 as_uint64; + + struct { + u32 vector:8; + u32 delivery_mode:3; + u32 destination_mode:1; + u32 delivery_status:1; + u32 interrupt_polarity:1; + u32 remote_irr:1; + u32 trigger_mode:1; + u32 interrupt_mask:1; + u32 reserved1:15; + + u32 reserved2:24; + u32 destination_id:8; + }; + + struct { + u32 low_uint32; + u32 high_uint32; + }; +} __packed; + struct hv_interrupt_entry { - u32 source; /* 1 for MSI(-X) */ + u32 source; u32 reserved1; - union hv_msi_entry msi_entry; + union { + union hv_msi_entry msi_entry; + union hv_ioapic_rte ioapic_rte; + }; } __packed; /* -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:34 +0000 Subject: [PATCH 14/53] x86/hyperv: implement an MSI domain for root partition When Linux runs as the root partition on Microsoft Hypervisor, its interrupts are remapped. Linux will need to explicitly map and unmap interrupts for hardware. Implement an MSI domain to issue the correct hypercalls. And initialize this irq domain as the default MSI irq domain. Signed-off-by: Sunil Muthuswamy Co-Developed-by: Sunil Muthuswamy Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-16-wei.liu@kernel.org (cherry picked from commit e39397d1fd6851bef4dfb63a631b8e15d1f43329) --- arch/x86/hyperv/Makefile | 2 +- arch/x86/hyperv/hv_init.c | 9 + arch/x86/hyperv/irqdomain.c | 360 ++++++++++++++++++++++++++++++++ arch/x86/include/asm/mshyperv.h | 2 + 4 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 arch/x86/hyperv/irqdomain.c diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 565358020921..48e2c51464e8 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y := hv_init.o mmu.o nested.o +obj-y := hv_init.o mmu.o nested.o irqdomain.o obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o ifdef CONFIG_X86_64 diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index aeea8fbf3c23..b81047dec1da 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -511,6 +511,15 @@ void __init hyperv_init(void) BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); +#ifdef CONFIG_PCI_MSI + /* + * If we're running as root, we want to create our own PCI MSI domain. + * We can't set this in hv_pci_init because that would be too late. + */ + if (hv_root_partition) + x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; +#endif + return; remove_cpuhp_state: diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c new file mode 100644 index 000000000000..bddcf1d6860d --- /dev/null +++ b/arch/x86/hyperv/irqdomain.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor. + * + * Authors: + * Sunil Muthuswamy + * Wei Liu + */ + +#include +#include +#include + +static int hv_map_interrupt(union hv_device_id device_id, bool level, + int cpu, int vector, struct hv_interrupt_entry *entry) +{ + struct hv_input_map_device_interrupt *input; + struct hv_output_map_device_interrupt *output; + struct hv_device_interrupt_descriptor *intr_desc; + unsigned long flags; + u64 status; + int nr_bank, var_size; + + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + intr_desc = &input->interrupt_descriptor; + memset(input, 0, sizeof(*input)); + input->partition_id = hv_current_partition_id; + input->device_id = device_id.as_uint64; + intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; + intr_desc->vector_count = 1; + intr_desc->target.vector = vector; + + if (level) + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL; + else + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; + + intr_desc->target.vp_set.valid_bank_mask = 0; + intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); + if (nr_bank < 0) { + local_irq_restore(flags); + pr_err("%s: unable to generate VP set\n", __func__); + return EINVAL; + } + intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; + + /* + * var-sized hypercall, var-size starts after vp_mask (thus + * vp_set.format does not count, but vp_set.valid_bank_mask + * does). + */ + var_size = nr_bank + 1; + + status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size, + input, output); + *entry = output->interrupt_entry; + + local_irq_restore(flags); + + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) + pr_err("%s: hypercall failed, status %lld\n", __func__, status); + + return status & HV_HYPERCALL_RESULT_MASK; +} + +static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) +{ + unsigned long flags; + struct hv_input_unmap_device_interrupt *input; + struct hv_interrupt_entry *intr_entry; + u64 status; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + memset(input, 0, sizeof(*input)); + intr_entry = &input->interrupt_entry; + input->partition_id = hv_current_partition_id; + input->device_id = id; + *intr_entry = *old_entry; + + status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); + local_irq_restore(flags); + + return status & HV_HYPERCALL_RESULT_MASK; +} + +#ifdef CONFIG_PCI_MSI +struct rid_data { + struct pci_dev *bridge; + u32 rid; +}; + +static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data) +{ + struct rid_data *rd = data; + u8 bus = PCI_BUS_NUM(rd->rid); + + if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) { + rd->bridge = pdev; + rd->rid = alias; + } + + return 0; +} + +static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) +{ + union hv_device_id dev_id; + struct rid_data data = { + .bridge = NULL, + .rid = PCI_DEVID(dev->bus->number, dev->devfn) + }; + + pci_for_each_dma_alias(dev, get_rid_cb, &data); + + dev_id.as_uint64 = 0; + dev_id.device_type = HV_DEVICE_TYPE_PCI; + dev_id.pci.segment = pci_domain_nr(dev->bus); + + dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid); + dev_id.pci.bdf.device = PCI_SLOT(data.rid); + dev_id.pci.bdf.function = PCI_FUNC(data.rid); + dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE; + + if (data.bridge) { + int pos; + + /* + * Microsoft Hypervisor requires a bus range when the bridge is + * running in PCI-X mode. + * + * To distinguish conventional vs PCI-X bridge, we can check + * the bridge's PCI-X Secondary Status Register, Secondary Bus + * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge + * Specification Revision 1.0 5.2.2.1.3. + * + * Value zero means it is in conventional mode, otherwise it is + * in PCI-X mode. + */ + + pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX); + if (pos) { + u16 status; + + pci_read_config_word(data.bridge, pos + + PCI_X_BRIDGE_SSTATUS, &status); + + if (status & PCI_X_SSTATUS_FREQ) { + /* Non-zero, PCI-X mode */ + u8 sec_bus, sub_bus; + + dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE; + + pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus); + dev_id.pci.shadow_bus_range.secondary_bus = sec_bus; + pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus); + dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus; + } + } + } + + return dev_id; +} + +static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, + struct hv_interrupt_entry *entry) +{ + union hv_device_id device_id = hv_build_pci_dev_id(dev); + + return hv_map_interrupt(device_id, false, cpu, vector, entry); +} + +static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) +{ + /* High address is always 0 */ + msg->address_hi = 0; + msg->address_lo = entry->msi_entry.address.as_uint32; + msg->data = entry->msi_entry.data.as_uint32; +} + +static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); +static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *msidesc; + struct pci_dev *dev; + struct hv_interrupt_entry out_entry, *stored_entry; + struct irq_cfg *cfg = irqd_cfg(data); + cpumask_t *affinity; + int cpu; + u64 status; + + msidesc = irq_data_get_msi_desc(data); + dev = msi_desc_to_pci_dev(msidesc); + + if (!cfg) { + pr_debug("%s: cfg is NULL", __func__); + return; + } + + affinity = irq_data_get_effective_affinity_mask(data); + cpu = cpumask_first_and(affinity, cpu_online_mask); + + if (data->chip_data) { + /* + * This interrupt is already mapped. Let's unmap first. + * + * We don't use retarget interrupt hypercalls here because + * Microsoft Hypervisor doens't allow root to change the vector + * or specify VPs outside of the set that is initially used + * during mapping. + */ + stored_entry = data->chip_data; + data->chip_data = NULL; + + status = hv_unmap_msi_interrupt(dev, stored_entry); + + kfree(stored_entry); + + if (status != HV_STATUS_SUCCESS) { + pr_debug("%s: failed to unmap, status %lld", __func__, status); + return; + } + } + + stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); + if (!stored_entry) { + pr_debug("%s: failed to allocate chip data\n", __func__); + return; + } + + status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); + if (status != HV_STATUS_SUCCESS) { + kfree(stored_entry); + return; + } + + *stored_entry = out_entry; + data->chip_data = stored_entry; + entry_to_msi_msg(&out_entry, msg); + + return; +} + +static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry) +{ + return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry); +} + +static void hv_teardown_msi_irq_common(struct pci_dev *dev, struct msi_desc *msidesc, int irq) +{ + u64 status; + struct hv_interrupt_entry old_entry; + struct irq_desc *desc; + struct irq_data *data; + struct msi_msg msg; + + desc = irq_to_desc(irq); + if (!desc) { + pr_debug("%s: no irq desc\n", __func__); + return; + } + + data = &desc->irq_data; + if (!data) { + pr_debug("%s: no irq data\n", __func__); + return; + } + + if (!data->chip_data) { + pr_debug("%s: no chip data\n!", __func__); + return; + } + + old_entry = *(struct hv_interrupt_entry *)data->chip_data; + entry_to_msi_msg(&old_entry, &msg); + + kfree(data->chip_data); + data->chip_data = NULL; + + status = hv_unmap_msi_interrupt(dev, &old_entry); + + if (status != HV_STATUS_SUCCESS) { + pr_err("%s: hypercall failed, status %lld\n", __func__, status); + return; + } +} + +static void hv_msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +{ + int i; + struct msi_desc *entry; + struct pci_dev *pdev; + + if (WARN_ON_ONCE(!dev_is_pci(dev))) + return; + + pdev = to_pci_dev(dev); + + for_each_pci_msi_entry(entry, pdev) { + if (entry->irq) { + for (i = 0; i < entry->nvec_used; i++) { + hv_teardown_msi_irq_common(pdev, entry, entry->irq + i); + irq_domain_free_irqs(entry->irq + i, 1); + } + } + } +} + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip hv_pci_msi_controller = { + .name = "HV-PCI-MSI", + .irq_unmask = pci_msi_unmask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = hv_irq_compose_msi_msg, + .irq_set_affinity = msi_domain_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static struct msi_domain_ops pci_msi_domain_ops = { + .domain_free_irqs = hv_msi_domain_free_irqs, + .msi_prepare = pci_msi_prepare, +}; + +static struct msi_domain_info hv_pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &hv_pci_msi_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; + +struct irq_domain * __init hv_create_pci_msi_domain(void) +{ + struct irq_domain *d = NULL; + struct fwnode_handle *fn; + + fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); + if (fn) + d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); + + /* No point in going further if we can't get an irq domain */ + BUG_ON(!d); + + return d; +} + +#endif /* CONFIG_PCI_MSI */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 7bd4022da061..4533773115ea 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -263,6 +263,8 @@ static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, msi_entry->data.as_uint32 = msi_desc->msg.data; } +struct irq_domain *hv_create_pci_msi_domain(void); + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 3 Feb 2021 15:04:35 +0000 Subject: [PATCH 15/53] iommu/hyperv: setup an IO-APIC IRQ remapping domain for root partition Just like MSI/MSI-X, IO-APIC interrupts are remapped by Microsoft Hypervisor when Linux runs as the root partition. Implement an IRQ domain to handle mapping and unmapping of IO-APIC interrupts. Signed-off-by: Wei Liu Acked-by: Joerg Roedel Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210203150435.27941-17-wei.liu@kernel.org (cherry picked from commit fb5ef35165a37ca63ef0227657eabd06f0a39cf9) --- arch/x86/hyperv/irqdomain.c | 25 +++++ arch/x86/include/asm/mshyperv.h | 4 + drivers/iommu/hyperv-iommu.c | 177 +++++++++++++++++++++++++++++++- 3 files changed, 203 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index bddcf1d6860d..4421a8d92e23 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -358,3 +358,28 @@ struct irq_domain * __init hv_create_pci_msi_domain(void) } #endif /* CONFIG_PCI_MSI */ + +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry) +{ + union hv_device_id device_id; + + device_id.as_uint64 = 0; + device_id.device_type = HV_DEVICE_TYPE_IOAPIC; + device_id.ioapic.ioapic_id = (u8)ioapic_id; + + return hv_unmap_interrupt(device_id.as_uint64, entry); +} +EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt); + +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector, + struct hv_interrupt_entry *entry) +{ + union hv_device_id device_id; + + device_id.as_uint64 = 0; + device_id.device_type = HV_DEVICE_TYPE_IOAPIC; + device_id.ioapic.ioapic_id = (u8)ioapic_id; + + return hv_map_interrupt(device_id, level, cpu, vector, entry); +} +EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 4533773115ea..ccf60a809a17 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -265,6 +265,10 @@ static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, struct irq_domain *hv_create_pci_msi_domain(void); +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, + struct hv_interrupt_entry *entry); +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index e09e2d734c57..d08e5b9e4f0e 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "irq_remapping.h" @@ -137,29 +138,42 @@ static const struct irq_domain_ops hyperv_ir_domain_ops = { .activate = hyperv_irq_remapping_activate, }; +static const struct irq_domain_ops hyperv_root_ir_domain_ops; static int __init hyperv_prepare_irq_remapping(void) { struct fwnode_handle *fn; int i; + const char *name; + const struct irq_domain_ops *ops; if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || !x2apic_supported()) return -ENODEV; - fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); + if (hv_root_partition) { + name = "HYPERV-ROOT-IR"; + ops = &hyperv_root_ir_domain_ops; + } else { + name = "HYPERV-IR"; + ops = &hyperv_ir_domain_ops; + } + + fn = irq_domain_alloc_named_id_fwnode(name, 0); if (!fn) return -ENOMEM; ioapic_ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), - 0, IOAPIC_REMAPPING_ENTRY, fn, - &hyperv_ir_domain_ops, NULL); + 0, IOAPIC_REMAPPING_ENTRY, fn, ops, NULL); if (!ioapic_ir_domain) { irq_domain_free_fwnode(fn); return -ENOMEM; } + if (hv_root_partition) + return 0; /* The rest is only relevant to guests */ + /* * Hyper-V doesn't provide irq remapping function for * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. @@ -196,4 +210,161 @@ struct irq_remap_ops hyperv_irq_remap_ops = { .get_irq_domain = hyperv_get_irq_domain, }; +/* IRQ remapping domain when Linux runs as the root partition */ +struct hyperv_root_ir_data { + u8 ioapic_id; + bool is_level; + struct hv_interrupt_entry entry; +}; + +static void +hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) +{ + u64 status; + u32 vector; + struct irq_cfg *cfg; + int ioapic_id; + struct cpumask *affinity; + int cpu; + struct hv_interrupt_entry entry; + struct hyperv_root_ir_data *data = irq_data->chip_data; + struct IO_APIC_route_entry e; + + cfg = irqd_cfg(irq_data); + affinity = irq_data_get_effective_affinity_mask(irq_data); + cpu = cpumask_first_and(affinity, cpu_online_mask); + + vector = cfg->vector; + ioapic_id = data->ioapic_id; + + if (data->entry.source == HV_DEVICE_TYPE_IOAPIC + && data->entry.ioapic_rte.as_uint64) { + entry = data->entry; + + status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); + + if (status != HV_STATUS_SUCCESS) + pr_debug("%s: unexpected unmap status %lld\n", __func__, status); + + data->entry.ioapic_rte.as_uint64 = 0; + data->entry.source = 0; /* Invalid source */ + } + + + status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, + vector, &entry); + + if (status != HV_STATUS_SUCCESS) { + pr_err("%s: map hypercall failed, status %lld\n", __func__, status); + return; + } + + data->entry = entry; + + /* Turn it into an IO_APIC_route_entry, and generate MSI MSG. */ + e.w1 = entry.ioapic_rte.low_uint32; + e.w2 = entry.ioapic_rte.high_uint32; + + memset(msg, 0, sizeof(*msg)); + msg->arch_data.vector = e.vector; + msg->arch_data.delivery_mode = e.delivery_mode; + msg->arch_addr_lo.dest_mode_logical = e.dest_mode_logical; + msg->arch_addr_lo.dmar_format = e.ir_format; + msg->arch_addr_lo.dmar_index_0_14 = e.ir_index_0_14; +} + +static int hyperv_root_ir_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + send_cleanup_vector(cfg); + + return 0; +} + +static struct irq_chip hyperv_root_ir_chip = { + .name = "HYPERV-ROOT-IR", + .irq_ack = apic_ack_irq, + .irq_set_affinity = hyperv_root_ir_set_affinity, + .irq_compose_msi_msg = hyperv_root_ir_compose_msi_msg, +}; + +static int hyperv_root_irq_remapping_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + struct irq_alloc_info *info = arg; + struct irq_data *irq_data; + struct hyperv_root_ir_data *data; + int ret = 0; + + if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) + return -EINVAL; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return -ENOMEM; + } + + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) { + kfree(data); + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return -EINVAL; + } + + data->ioapic_id = info->devid; + data->is_level = info->ioapic.is_level; + + irq_data->chip = &hyperv_root_ir_chip; + irq_data->chip_data = data; + + return 0; +} + +static void hyperv_root_irq_remapping_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data; + struct hyperv_root_ir_data *data; + struct hv_interrupt_entry *e; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + + if (irq_data && irq_data->chip_data) { + data = irq_data->chip_data; + e = &data->entry; + + if (e->source == HV_DEVICE_TYPE_IOAPIC + && e->ioapic_rte.as_uint64) + hv_unmap_ioapic_interrupt(data->ioapic_id, + &data->entry); + + kfree(data); + } + } + + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} + +static const struct irq_domain_ops hyperv_root_ir_domain_ops = { + .select = hyperv_irq_remapping_select, + .alloc = hyperv_root_irq_remapping_alloc, + .free = hyperv_root_irq_remapping_free, +}; + #endif -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:13 -0800 Subject: [PATCH 16/53] Drivers: hv: vmbus: Move Hyper-V page allocator to arch neutral code The Hyper-V page allocator functions are implemented in an architecture neutral way. Move them into the architecture neutral VMbus module so a separate implementation for ARM64 is not needed. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/1614721102-2241-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit ca48739e59df31d16c27dbcd9ea2ea61d7caa9fb) --- arch/x86/hyperv/hv_init.c | 22 -------------------- arch/x86/include/asm/mshyperv.h | 5 ----- drivers/hv/hv.c | 36 +++++++++++++++++++++++++++++++++ include/asm-generic/mshyperv.h | 4 ++++ 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b81047dec1da..4bdb3443b25e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -54,28 +54,6 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); -void *hv_alloc_hyperv_page(void) -{ - BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); - - return (void *)__get_free_page(GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); - -void *hv_alloc_hyperv_zeroed_page(void) -{ - BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); - - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} -EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); - -void hv_free_hyperv_page(unsigned long addr) -{ - free_page(addr); -} -EXPORT_SYMBOL_GPL(hv_free_hyperv_page); - static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ccf60a809a17..ef6e968e2828 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -233,9 +233,6 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void *hv_alloc_hyperv_page(void); -void *hv_alloc_hyperv_zeroed_page(void); -void hv_free_hyperv_page(unsigned long addr); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); @@ -272,8 +269,6 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} -static inline void *hv_alloc_hyperv_page(void) { return NULL; } -static inline void hv_free_hyperv_page(unsigned long addr) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index f202ac7f4b3d..cca8d5ea61f0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -36,6 +36,42 @@ int hv_init(void) return 0; } +/* + * Functions for allocating and freeing memory with size and + * alignment HV_HYP_PAGE_SIZE. These functions are needed because + * the guest page size may not be the same as the Hyper-V page + * size. We depend upon kmalloc() aligning power-of-two size + * allocations to the allocation size boundary, so that the + * allocated memory appears to Hyper-V as a page of the size + * it expects. + */ + +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + return (void *)__get_free_page(GFP_KERNEL); + else + return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); +} + +void *hv_alloc_hyperv_zeroed_page(void) +{ + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + else + return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); +} + +void hv_free_hyperv_page(unsigned long addr) +{ + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + free_page(addr); + else + kfree((void *)addr); +} + /* * hv_post_message - Post a message using the hypervisor message IPC. * diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index dff58a3db5d5..694b5bc3561c 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -117,6 +117,10 @@ extern u32 hv_max_vp_index; /* Sentinel value for an uninitialized entry in hv_vp_index array */ #define VP_INVAL U32_MAX +void *hv_alloc_hyperv_page(void); +void *hv_alloc_hyperv_zeroed_page(void); +void hv_free_hyperv_page(unsigned long addr); + /** * hv_cpu_number_to_vp_number() - Map CPU to VP. * @cpu_number: CPU number in Linux terms -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:14 -0800 Subject: [PATCH 17/53] x86/hyper-v: Move hv_message_type to architecture neutral module The definition of enum hv_message_type includes arch neutral and x86/x64-specific values. Ideally there would be a way to put the arch neutral values in an arch neutral module, and the arch specific values in an arch specific module. But C doesn't provide a way to extend enum types. As a compromise, move the entire definition into an arch neutral module, to avoid duplicating the arch neutral values for x86/x64 and for ARM64. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/1614721102-2241-3-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 5e4e6ddf8d74068fd6bb7922dabcfa2c0f506c39) --- arch/x86/include/asm/hyperv-tlfs.h | 29 ------------------------- include/asm-generic/hyperv-tlfs.h | 35 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 19ac499ab251..119cc587775a 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -281,35 +281,6 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 - -/* Define hypervisor message types. */ -enum hv_message_type { - HVMSG_NONE = 0x00000000, - - /* Memory access messages. */ - HVMSG_UNMAPPED_GPA = 0x80000000, - HVMSG_GPA_INTERCEPT = 0x80000001, - - /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, - - /* Error messages. */ - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, - - /* Trace buffer complete messages. */ - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, - - /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, - HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - struct hv_nested_enlightenments_control { struct { __u32 directhypercall:1; diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 83448e837ded..9cf10837d005 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -220,6 +220,41 @@ enum HV_GENERIC_SET_FORMAT { #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) +/* + * Define hypervisor message types. Some of the message types + * are x86/x64 specific, but there's no good way to separate + * them out into the arch-specific version of hyperv-tlfs.h + * because C doesn't provide a way to extend enum types. + * Keeping them all in the arch neutral hyperv-tlfs.h seems + * the least messy compromise. + */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + /* Define synthetic interrupt controller message flags. */ union hv_message_flags { __u8 asu8; -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:20 -0800 Subject: [PATCH 18/53] clocksource/drivers/hyper-v: Handle sched_clock differences inline While the Hyper-V Reference TSC code is architecture neutral, the pv_ops.time.sched_clock() function is implemented for x86/x64, but not for ARM64. Current code calls a utility function under arch/x86 (and coming, under arch/arm64) to handle the difference. Change this approach to handle the difference inline based on whether GENERIC_SCHED_CLOCK is present. The new approach removes code under arch/* since the difference is tied more to the specifics of the Linux implementation than to the architecture. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/1614721102-2241-9-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit eb3e1d370b4c57be1acbb9de51a7deaa036eff4b) --- arch/x86/include/asm/mshyperv.h | 11 ----------- drivers/clocksource/hyperv_timer.c | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ef6e968e2828..212d34f80bb4 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -56,17 +56,6 @@ typedef int (*hyperv_fill_flush_list_func)( #define hv_get_raw_timer() rdtsc_ordered() #define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR -/* - * Reference to pv_ops must be inline so objtool - * detection of noinstr violations can work correctly. - */ -static __always_inline void hv_setup_sched_clock(void *sched_clock) -{ -#ifdef CONFIG_PARAVIRT - pv_ops.time.sched_clock = sched_clock; -#endif -} - void hyperv_vector_handler(struct pt_regs *regs); static inline void hv_enable_stimer0_percpu_irq(int irq) {} diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 269a691bd2c4..cabd8453461f 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -418,6 +418,30 @@ static struct clocksource hyperv_cs_msr = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +/* + * Reference to pv_ops must be inline so objtool + * detection of noinstr violations can work correctly. + */ +#ifdef CONFIG_GENERIC_SCHED_CLOCK +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ + /* + * We're on an architecture with generic sched clock (not x86/x64). + * The Hyper-V sched clock read function returns nanoseconds, not + * the normal 100ns units of the Hyper-V synthetic clock. + */ + sched_clock_register(sched_clock, 64, NSEC_PER_SEC); +} +#elif defined CONFIG_PARAVIRT +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ + /* We're on x86/x64 *and* using PV ops */ + pv_ops.time.sched_clock = sched_clock; +} +#else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ +static __always_inline void hv_setup_sched_clock(void *sched_clock) {} +#endif /* CONFIG_GENERIC_SCHED_CLOCK */ + static bool __init hv_init_tsc_clocksource(void) { u64 tsc_msr; -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:16 -0800 Subject: [PATCH 19/53] Drivers: hv: vmbus: Move hyperv_report_panic_msg to arch neutral code With the new Hyper-V MSR set function, hyperv_report_panic_msg() can be architecture neutral, so move it out from under arch/x86 and merge into hv_kmsg_dump(). This move also avoids needing a separate implementation under arch/arm64. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/1614721102-2241-5-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit b548a7742791e7818bc2780b2354b9714fd8f8d9) --- arch/x86/hyperv/hv_init.c | 27 --------------------------- drivers/hv/vmbus_drv.c | 24 +++++++++++++++++++----- include/asm-generic/mshyperv.h | 1 - 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 4bdb3443b25e..f5a8f057ed89 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -571,33 +571,6 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) } EXPORT_SYMBOL_GPL(hyperv_report_panic); -/** - * hyperv_report_panic_msg - report panic message to Hyper-V - * @pa: physical address of the panic page containing the message - * @size: size of the message in the page - */ -void hyperv_report_panic_msg(phys_addr_t pa, size_t size) -{ - /* - * P3 to contain the physical address of the panic page & P4 to - * contain the size of the panic data in that page. Rest of the - * registers are no-op when the NOTIFY_MSG flag is set. - */ - wrmsrl(HV_X64_MSR_CRASH_P0, 0); - wrmsrl(HV_X64_MSR_CRASH_P1, 0); - wrmsrl(HV_X64_MSR_CRASH_P2, 0); - wrmsrl(HV_X64_MSR_CRASH_P3, pa); - wrmsrl(HV_X64_MSR_CRASH_P4, size); - - /* - * Let Hyper-V know there is crash data available along with - * the panic message. - */ - wrmsrl(HV_X64_MSR_CRASH_CTL, - (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); -} -EXPORT_SYMBOL_GPL(hyperv_report_panic_msg); - bool hv_is_hyperv_initialized(void) { union hv_x64_msr_hypercall_contents hypercall_msr; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a5a402e776c7..4f9a1d12aa88 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1362,22 +1362,36 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { size_t bytes_written; - phys_addr_t panic_pa; /* We are only interested in panics. */ if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) return; - panic_pa = virt_to_phys(hv_panic_page); - /* * Write dump contents to the page. No need to synchronize; panic should * be single-threaded. */ kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, &bytes_written); - if (bytes_written) - hyperv_report_panic_msg(panic_pa, bytes_written); + if (!bytes_written) + return; + /* + * P3 to contain the physical address of the panic page & P4 to + * contain the size of the panic data in that page. Rest of the + * registers are no-op when the NOTIFY_MSG flag is set. + */ + hv_set_register(HV_REGISTER_CRASH_P0, 0); + hv_set_register(HV_REGISTER_CRASH_P1, 0); + hv_set_register(HV_REGISTER_CRASH_P2, 0); + hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); + hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); + + /* + * Let Hyper-V know there is crash data available along with + * the panic message. + */ + hv_set_register(HV_REGISTER_CRASH_CTL, + (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); } static struct kmsg_dumper hv_kmsg_dumper = { diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 694b5bc3561c..bbc011390cbb 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -173,7 +173,6 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, } void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); -void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); enum hv_isolation_type hv_get_isolation_type(void); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:21 -0800 Subject: [PATCH 20/53] clocksource/drivers/hyper-v: Set clocksource rating based on Hyper-V feature On x86/x64, the TSC clocksource is available in a Hyper-V VM only if Hyper-V provides the TSC_INVARIANT flag. The rating on the Hyper-V Reference TSC page clocksource is currently set so that it will not override the TSC clocksource in this case. Alternatively, if the TSC clocksource is not available, then the Hyper-V clocksource is used. But on ARM64, the Hyper-V Reference TSC page clocksource should override the ARM arch counter, since the Hyper-V clocksource provides scaling and offsetting during live migrations that is not provided for the ARM arch counter. To get the needed behavior for both x86/x64 and ARM64, tweak the logic by defaulting the Hyper-V Reference TSC page clocksource rating to a large value that will always override. If the Hyper-V TSC_INVARIANT flag is set, then reduce the rating so that it will not override the TSC. While the logic for getting there is slightly different, the net result in the normal cases is no functional change. Signed-off-by: Michael Kelley Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/1614721102-2241-10-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 4c78738ead4e195c7032c31fe56135c1b00e1784) --- drivers/clocksource/hyperv_timer.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index cabd8453461f..c97e1b1e6653 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -302,14 +302,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); * the other that uses the TSC reference page feature as defined in the * TLFS. The MSR version is for compatibility with old versions of * Hyper-V and 32-bit x86. The TSC reference page version is preferred. - * - * The Hyper-V clocksource ratings of 250 are chosen to be below the - * TSC clocksource rating of 300. In configurations where Hyper-V offers - * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource - * is available and preferred. With the higher rating, it will be the - * default. On older hardware and Hyper-V versions, the TSC is marked - * "unstable", so no TSC clocksource is created and the selected Hyper-V - * clocksource will be the default. */ u64 (*hv_read_reference_counter)(void); @@ -378,7 +370,7 @@ static int hv_cs_enable(struct clocksource *cs) static struct clocksource hyperv_cs_tsc = { .name = "hyperv_clocksource_tsc_page", - .rating = 250, + .rating = 500, .read = read_hv_clock_tsc_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -412,7 +404,7 @@ static u64 notrace read_hv_sched_clock_msr(void) static struct clocksource hyperv_cs_msr = { .name = "hyperv_clocksource_msr", - .rating = 250, + .rating = 500, .read = read_hv_clock_msr_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -453,6 +445,17 @@ static bool __init hv_init_tsc_clocksource(void) if (hv_root_partition) return false; + /* + * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly + * handles frequency and offset changes due to live migration, + * pause/resume, and other VM management operations. So lower the + * Hyper-V Reference TSC rating, causing the generic TSC to be used. + * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference + * TSC will be preferred over the virtualized ARM64 arch counter. + */ + if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) + hyperv_cs_tsc.rating = 250; + hv_read_reference_counter = read_hv_clock_tsc; phys_addr = virt_to_phys(hv_get_tsc_page()); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Fri, 26 Mar 2021 14:49:42 +0800 Subject: [PATCH 21/53] x86/hyperv: remove unused linux/version.h header That header is not needed in hv_proc.c. Reported-by: Hulk Robot Signed-off-by: Yongjun Zheng Link: https://lore.kernel.org/r/20210326064942.3263776-1-zhengyongjun3@huawei.com Signed-off-by: Wei Liu (cherry picked from commit 90b9bfa4707c85c02cc1b22b57bc8abc24a6a5f0) --- arch/x86/hyperv/hv_proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index 60461e598239..27e17ad3ba49 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include #include #include -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Sunil Muthuswamy Date: Tue, 23 Mar 2021 18:47:16 +0000 Subject: [PATCH 22/53] x86/Hyper-V: Support for free page reporting Linux has support for free page reporting now (36e66c554b5c) for virtualized environment. On Hyper-V when virtually backed VMs are configured, Hyper-V will advertise cold memory discard capability, when supported. This patch adds the support to hook into the free page reporting infrastructure and leverage the Hyper-V cold memory discard hint hypercall to report/free these pages back to the host. Signed-off-by: Sunil Muthuswamy Tested-by: Matheus Castello Reviewed-by: Michael Kelley Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/SN4PR2101MB0880121FA4E2FEC67F35C1DCC0649@SN4PR2101MB0880.namprd21.prod.outlook.com Signed-off-by: Wei Liu (cherry picked from commit 6dc2a774cb4fdb524b7eb0b8db74198a1b4815ea) --- arch/x86/hyperv/hv_init.c | 51 +++++++++++++++++- arch/x86/kernel/cpu/mshyperv.c | 9 ++-- drivers/hv/Kconfig | 1 + drivers/hv/hv_balloon.c | 89 +++++++++++++++++++++++++++++++ include/asm-generic/hyperv-tlfs.h | 35 +++++++++++- include/asm-generic/mshyperv.h | 3 +- 6 files changed, 180 insertions(+), 8 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f5a8f057ed89..ea81e5608e55 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -498,6 +498,8 @@ void __init hyperv_init(void) x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; #endif + /* Query the VMs extended capability once, so that it can be cached. */ + hv_query_ext_cap(0); return; remove_cpuhp_state: @@ -601,7 +603,7 @@ EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); enum hv_isolation_type hv_get_isolation_type(void) { - if (!(ms_hyperv.features_b & HV_ISOLATION)) + if (!(ms_hyperv.priv_high & HV_ISOLATION)) return HV_ISOLATION_TYPE_NONE; return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); } @@ -612,3 +614,50 @@ bool hv_is_isolation_supported(void) return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; } EXPORT_SYMBOL_GPL(hv_is_isolation_supported); + +/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ +bool hv_query_ext_cap(u64 cap_query) +{ + /* + * The address of the 'hv_extended_cap' variable will be used as an + * output parameter to the hypercall below and so it should be + * compatible with 'virt_to_phys'. Which means, it's address should be + * directly mapped. Use 'static' to keep it compatible; stack variables + * can be virtually mapped, making them imcompatible with + * 'virt_to_phys'. + * Hypercall input/output addresses should also be 8-byte aligned. + */ + static u64 hv_extended_cap __aligned(8); + static bool hv_extended_cap_queried; + u64 status; + + /* + * Querying extended capabilities is an extended hypercall. Check if the + * partition supports extended hypercall, first. + */ + if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) + return false; + + /* Extended capabilities do not change at runtime. */ + if (hv_extended_cap_queried) + return hv_extended_cap & cap_query; + + status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, + &hv_extended_cap); + + /* + * The query extended capabilities hypercall should not fail under + * any normal circumstances. Avoid repeatedly making the hypercall, on + * error. + */ + hv_extended_cap_queried = true; + status &= HV_HYPERCALL_RESULT_MASK; + if (status != HV_STATUS_SUCCESS) { + pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", + status); + return false; + } + + return hv_extended_cap & cap_query; +} +EXPORT_SYMBOL_GPL(hv_query_ext_cap); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9fde437f53ea..16a5a901cde3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -274,12 +274,13 @@ static void __init ms_hyperv_init_platform(void) * Extract the features and hints */ ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); - ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES); + ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES); ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", - ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); + pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints, + ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); @@ -325,7 +326,7 @@ static void __init ms_hyperv_init_platform(void) x86_platform.calibrate_cpu = hv_get_tsc_khz; } - if (ms_hyperv.features_b & HV_ISOLATION) { + if (ms_hyperv.priv_high & HV_ISOLATION) { ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 79e5356a737a..66c794d92391 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -23,6 +23,7 @@ config HYPERV_UTILS config HYPERV_BALLOON tristate "Microsoft Hyper-V Balloon driver" depends on HYPERV + select PAGE_REPORTING help Select this option to enable Hyper-V Balloon driver. diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index eb56e09ae15f..61b995ff00e3 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -563,6 +564,8 @@ struct hv_dynmem_device { * The negotiated version agreed by host. */ __u32 version; + + struct page_reporting_dev_info pr_dev_info; }; static struct hv_dynmem_device dm_device; @@ -1565,6 +1568,89 @@ static void balloon_onchannelcallback(void *context) } +/* Hyper-V only supports reporting 2MB pages or higher */ +#define HV_MIN_PAGE_REPORTING_ORDER 9 +#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER) +static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, + struct scatterlist *sgl, unsigned int nents) +{ + unsigned long flags; + struct hv_memory_hint *hint; + int i; + u64 status; + struct scatterlist *sg; + + WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); + WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN); + local_irq_save(flags); + hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg); + if (!hint) { + local_irq_restore(flags); + return -ENOSPC; + } + + hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD; + hint->reserved = 0; + for_each_sg(sgl, sg, nents, i) { + union hv_gpa_page_range *range; + + range = &hint->ranges[i]; + range->address_space = 0; + /* page reporting only reports 2MB pages or higher */ + range->page.largepage = 1; + range->page.additional_pages = + (sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1; + range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; + range->base_large_pfn = + page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER; + } + + status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, + hint, NULL); + local_irq_restore(flags); + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { + pr_err("Cold memory discard hypercall failed with status %llx\n", + status); + return -EINVAL; + } + + return 0; +} + +static void enable_page_reporting(void) +{ + int ret; + + /* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */ + if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) { + pr_debug("Cold memory discard is only supported on 2MB pages and above\n"); + return; + } + + if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { + pr_debug("Cold memory discard hint not supported by Hyper-V\n"); + return; + } + + BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); + dm_device.pr_dev_info.report = hv_free_page_report; + ret = page_reporting_register(&dm_device.pr_dev_info); + if (ret < 0) { + dm_device.pr_dev_info.report = NULL; + pr_err("Failed to enable cold memory discard: %d\n", ret); + } else { + pr_info("Cold memory discard hint enabled\n"); + } +} + +static void disable_page_reporting(void) +{ + if (dm_device.pr_dev_info.report) { + page_reporting_unregister(&dm_device.pr_dev_info); + dm_device.pr_dev_info.report = NULL; + } +} + static int balloon_connect_vsp(struct hv_device *dev) { struct dm_version_request version_req; @@ -1710,6 +1796,7 @@ static int balloon_probe(struct hv_device *dev, if (ret != 0) return ret; + enable_page_reporting(); dm_device.state = DM_INITIALIZED; dm_device.thread = @@ -1724,6 +1811,7 @@ static int balloon_probe(struct hv_device *dev, probe_error: dm_device.state = DM_INIT_ERROR; dm_device.thread = NULL; + disable_page_reporting(); vmbus_close(dev->channel); #ifdef CONFIG_MEMORY_HOTPLUG unregister_memory_notifier(&hv_memory_nb); @@ -1746,6 +1834,7 @@ static int balloon_remove(struct hv_device *dev) cancel_work_sync(&dm->ha_wrk.wrk); kthread_stop(dm->thread); + disable_page_reporting(); vmbus_close(dev->channel); #ifdef CONFIG_MEMORY_HOTPLUG unregister_memory_notifier(&hv_memory_nb); diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 9cf10837d005..515c3fb06ab3 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -89,9 +89,9 @@ #define HV_ACCESS_STATS BIT(8) #define HV_DEBUGGING BIT(11) #define HV_CPU_MANAGEMENT BIT(12) +#define HV_ENABLE_EXTENDED_HYPERCALLS BIT(20) #define HV_ISOLATION BIT(22) - /* * TSC page layout. */ @@ -159,11 +159,18 @@ struct ms_hyperv_tsc_page { #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 +/* Extended hypercalls */ +#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 +#define HV_EXT_CALL_MEMORY_HEAT_HINT 0x8003 + #define HV_FLUSH_ALL_PROCESSORS BIT(0) #define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) #define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) +/* Extended capability bits */ +#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8) + enum HV_GENERIC_SET_FORMAT { HV_GENERIC_SET_SPARSE_4K, HV_GENERIC_SET_ALL, @@ -408,8 +415,10 @@ struct hv_guest_mapping_flush { * by the bitwidth of "additional_pages" in union hv_gpa_page_range. */ #define HV_MAX_FLUSH_PAGES (2048) +#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB 0 +#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB 1 -/* HvFlushGuestPhysicalAddressList hypercall */ +/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */ union hv_gpa_page_range { u64 address_space; struct { @@ -417,6 +426,12 @@ union hv_gpa_page_range { u64 largepage:1; u64 basepfn:52; } page; + struct { + u64 reserved:12; + u64 page_size:1; + u64 reserved1:8; + u64 base_large_pfn:43; + }; }; /* @@ -774,4 +789,20 @@ struct hv_input_unmap_device_interrupt { #define HV_SOURCE_SHADOW_NONE 0x0 #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 +/* + * The whole argument should fit in a page to be able to pass to the hypervisor + * in one hypercall. + */ +#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES \ + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \ + sizeof(union hv_gpa_page_range)) + +/* HvExtCallMemoryHeatHint hypercall */ +#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD 2 +struct hv_memory_hint { + u64 type:2; + u64 reserved:62; + union hv_gpa_page_range ranges[]; +} __packed; + #endif diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index bbc011390cbb..c749d1c4f682 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -27,7 +27,7 @@ struct ms_hyperv_info { u32 features; - u32 features_b; + u32 priv_high; u32 misc_features; u32 hints; u32 nested_features; @@ -178,6 +178,7 @@ bool hv_is_hibernation_supported(void); enum hv_isolation_type hv_get_isolation_type(void); bool hv_is_isolation_supported(void); void hyperv_cleanup(void); +bool hv_query_ext_cap(u64 cap_query); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Joseph Salisbury Date: Fri, 16 Apr 2021 17:43:02 -0700 Subject: [PATCH 23/53] x86/hyperv: Move hv_do_rep_hypercall to asm-generic This patch makes no functional changes. It simply moves hv_do_rep_hypercall() out of arch/x86/include/asm/mshyperv.h and into asm-generic/mshyperv.h hv_do_rep_hypercall() is architecture independent, so it makes sense that it should be in the architecture independent mshyperv.h, not in the x86-specific mshyperv.h. This is done in preperation for a follow up patch which creates a consistent pattern for checking Hyper-V hypercall status. Signed-off-by: Joseph Salisbury Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1618620183-9967-1-git-send-email-joseph.salisbury@linux.microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 6523592cee4650c6aa997d69cd0045a01e07a1ef) --- arch/x86/include/asm/mshyperv.h | 32 -------------------------------- include/asm-generic/mshyperv.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 212d34f80bb4..2cf1afb55fea 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -178,38 +178,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) return hv_status; } -/* - * Rep hypercalls. Callers of this functions are supposed to ensure that - * rep_count and varhead_size comply with Hyper-V hypercall definition. - */ -static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, - void *input, void *output) -{ - u64 control = code; - u64 status; - u16 rep_comp; - - control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; - control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; - - do { - status = hv_do_hypercall(control, input, output); - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) - return status; - - /* Bits 32-43 of status have 'Reps completed' data. */ - rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> - HV_HYPERCALL_REP_COMP_OFFSET; - - control &= ~HV_HYPERCALL_REP_START_MASK; - control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; - - touch_nmi_watchdog(); - } while (rep_comp < rep_count); - - return status; -} - extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c749d1c4f682..2f01140bbf66 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -41,6 +41,37 @@ extern struct ms_hyperv_info ms_hyperv; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); +/* + * Rep hypercalls. Callers of this functions are supposed to ensure that + * rep_count and varhead_size comply with Hyper-V hypercall definition. + */ +static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, + void *input, void *output) +{ + u64 control = code; + u64 status; + u16 rep_comp; + + control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; + control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; + + do { + status = hv_do_hypercall(control, input, output); + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) + return status; + + /* Bits 32-43 of status have 'Reps completed' data. */ + rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> + HV_HYPERCALL_REP_COMP_OFFSET; + + control &= ~HV_HYPERCALL_REP_START_MASK; + control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; + + touch_nmi_watchdog(); + } while (rep_comp < rep_count); + + return status; +} /* Generate the guest OS identifier as described in the Hyper-V TLFS */ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Joseph Salisbury Date: Fri, 16 Apr 2021 17:43:03 -0700 Subject: [PATCH 24/53] drivers: hv: Create a consistent pattern for checking Hyper-V hypercall status There is not a consistent pattern for checking Hyper-V hypercall status. Existing code uses a number of variants. The variants work, but a consistent pattern would improve the readability of the code, and be more conformant to what the Hyper-V TLFS says about hypercall status. Implemented new helper functions hv_result(), hv_result_success(), and hv_repcomp(). Changed the places where hv_do_hypercall() and related variants are used to use the helper functions. Signed-off-by: Joseph Salisbury Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1618620183-9967-2-git-send-email-joseph.salisbury@linux.microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 753ed9c95c37d058e50e7d42bbe296ee0bf6670d) --- arch/x86/hyperv/hv_apic.c | 16 +++++++++------- arch/x86/hyperv/hv_init.c | 2 +- arch/x86/hyperv/hv_proc.c | 25 ++++++++++--------------- arch/x86/hyperv/irqdomain.c | 6 +++--- arch/x86/hyperv/mmu.c | 8 ++++---- arch/x86/hyperv/nested.c | 8 ++++---- arch/x86/include/asm/mshyperv.h | 1 + drivers/hv/hv.c | 2 +- drivers/pci/controller/pci-hyperv.c | 2 +- include/asm-generic/mshyperv.h | 25 ++++++++++++++++++++----- 10 files changed, 54 insertions(+), 41 deletions(-) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 284e73661a18..ca581b24974a 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -103,7 +103,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) struct hv_send_ipi_ex *ipi_arg; unsigned long flags; int nr_bank = 0; - int ret = 1; + u64 status = HV_STATUS_INVALID_PARAMETER; if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) return false; @@ -128,19 +128,19 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) if (!nr_bank) ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; - ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, ipi_arg, NULL); ipi_mask_ex_done: local_irq_restore(flags); - return ((ret == 0) ? true : false); + return hv_result_success(status); } static bool __send_ipi_mask(const struct cpumask *mask, int vector) { int cur_cpu, vcpu; struct hv_send_ipi ipi_arg; - int ret = 1; + u64 status; trace_hyperv_send_ipi_mask(mask, vector); @@ -184,9 +184,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask); } - ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, + status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, ipi_arg.cpu_mask); - return ((ret == 0) ? true : false); + return hv_result_success(status); do_ex_hypercall: return __send_ipi_mask_ex(mask, vector); @@ -195,6 +195,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) static bool __send_ipi_one(int cpu, int vector) { int vp = hv_cpu_number_to_vp_number(cpu); + u64 status; trace_hyperv_send_ipi_one(cpu, vector); @@ -207,7 +208,8 @@ static bool __send_ipi_one(int cpu, int vector) if (vp >= 64) return __send_ipi_mask_ex(cpumask_of(cpu), vector); - return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); + status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); + return hv_result_success(status); } static void hv_send_ipi(int cpu, int vector) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index ea81e5608e55..1c194e287866 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -347,7 +347,7 @@ static void __init hv_get_partition_id(void) local_irq_save(flags); output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { + if (!hv_result_success(status)) { /* No point in proceeding if this failed */ pr_err("Failed to get partition ID: %lld\n", status); BUG(); diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index 27e17ad3ba49..68a0843d4750 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -92,10 +92,9 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, page_count, 0, input_page, NULL); local_irq_restore(flags); - - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { + if (!hv_result_success(status)) { pr_err("Failed to deposit pages: %lld\n", status); - ret = status; + ret = hv_result(status); goto err_free_allocations; } @@ -121,7 +120,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) struct hv_add_logical_processor_out *output; u64 status; unsigned long flags; - int ret = 0; + int ret = HV_STATUS_SUCCESS; int pxm = node_to_pxm(node); /* @@ -147,13 +146,11 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) input, output); local_irq_restore(flags); - status &= HV_HYPERCALL_RESULT_MASK; - - if (status != HV_STATUS_INSUFFICIENT_MEMORY) { - if (status != HV_STATUS_SUCCESS) { + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) { pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, lp_index, apic_id, status); - ret = status; + ret = hv_result(status); } break; } @@ -168,7 +165,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) struct hv_create_vp *input; u64 status; unsigned long irq_flags; - int ret = 0; + int ret = HV_STATUS_SUCCESS; int pxm = node_to_pxm(node); /* Root VPs don't seem to need pages deposited */ @@ -199,13 +196,11 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); local_irq_restore(irq_flags); - status &= HV_HYPERCALL_RESULT_MASK; - - if (status != HV_STATUS_INSUFFICIENT_MEMORY) { - if (status != HV_STATUS_SUCCESS) { + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) { pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, vp_index, flags, status); - ret = status; + ret = hv_result(status); } break; } diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 4421a8d92e23..514fc64e23d5 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -63,10 +63,10 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, local_irq_restore(flags); - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) + if (!hv_result_success(status)) pr_err("%s: hypercall failed, status %lld\n", __func__, status); - return status & HV_HYPERCALL_RESULT_MASK; + return hv_result(status); } static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) @@ -88,7 +88,7 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); local_irq_restore(flags); - return status & HV_HYPERCALL_RESULT_MASK; + return hv_result(status); } #ifdef CONFIG_PCI_MSI diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 2c87350c1fb0..c0ba8874d9cb 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -58,7 +58,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, int cpu, vcpu, gva_n, max_gvas; struct hv_tlb_flush **flush_pcpu; struct hv_tlb_flush *flush; - u64 status = U64_MAX; + u64 status; unsigned long flags; trace_hyperv_mmu_flush_tlb_others(cpus, info); @@ -161,7 +161,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, check_status: local_irq_restore(flags); - if (!(status & HV_HYPERCALL_RESULT_MASK)) + if (hv_result_success(status)) return; do_native: native_flush_tlb_others(cpus, info); @@ -176,7 +176,7 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, u64 status; if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - return U64_MAX; + return HV_STATUS_INVALID_PARAMETER; flush_pcpu = (struct hv_tlb_flush_ex **) this_cpu_ptr(hyperv_pcpu_input_arg); @@ -201,7 +201,7 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); if (nr_bank < 0) - return U64_MAX; + return HV_STATUS_INVALID_PARAMETER; /* * We can flush not more than max_gvas with one hypercall. Flush the diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index dd0a843f766d..5d70968c8538 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -47,7 +47,7 @@ int hyperv_flush_guest_mapping(u64 as) flush, NULL); local_irq_restore(flags); - if (!(status & HV_HYPERCALL_RESULT_MASK)) + if (hv_result_success(status)) ret = 0; fault: @@ -92,7 +92,7 @@ int hyperv_flush_guest_mapping_range(u64 as, { struct hv_guest_mapping_flush_list **flush_pcpu; struct hv_guest_mapping_flush_list *flush; - u64 status = 0; + u64 status; unsigned long flags; int ret = -ENOTSUPP; int gpa_n = 0; @@ -125,10 +125,10 @@ int hyperv_flush_guest_mapping_range(u64 as, local_irq_restore(flags); - if (!(status & HV_HYPERCALL_RESULT_MASK)) + if (hv_result_success(status)) ret = 0; else - ret = status; + ret = hv_result(status); fault: trace_hyperv_nested_flush_guest_mapping_range(as, ret); return ret; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2cf1afb55fea..a34f31dd5d93 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -9,6 +9,7 @@ #include #include #include +#include typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index cca8d5ea61f0..3b1ba6c607a1 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -104,7 +104,7 @@ int hv_post_message(union hv_connection_id connection_id, */ put_cpu_ptr(hv_cpu); - return status & 0xFFFF; + return hv_result(status); } int hv_synic_alloc(void) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 7fd8cd554675..98aa92742198 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1291,7 +1291,7 @@ static void hv_irq_unmask(struct irq_data *data) * resumes, hv_pci_restore_msi_state() is able to correctly restore * the interrupt with the correct affinity. */ - if (res && hbus->state != hv_pcibus_removing) + if (!hv_result_success(res) && hbus->state != hv_pcibus_removing) dev_err(&hbus->hdev->device, "%s() failed: %#llx", __func__, res); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 2f01140bbf66..7a2492fb0ee1 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -41,6 +41,24 @@ extern struct ms_hyperv_info ms_hyperv; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); +/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ +static inline int hv_result(u64 status) +{ + return status & HV_HYPERCALL_RESULT_MASK; +} + +static inline bool hv_result_success(u64 status) +{ + return hv_result(status) == HV_STATUS_SUCCESS; +} + +static inline unsigned int hv_repcomp(u64 status) +{ + /* Bits [43:32] of status have 'Reps completed' data. */ + return (status & HV_HYPERCALL_REP_COMP_MASK) >> + HV_HYPERCALL_REP_COMP_OFFSET; +} + /* * Rep hypercalls. Callers of this functions are supposed to ensure that * rep_count and varhead_size comply with Hyper-V hypercall definition. @@ -57,12 +75,10 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, do { status = hv_do_hypercall(control, input, output); - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) + if (!hv_result_success(status)) return status; - /* Bits 32-43 of status have 'Reps completed' data. */ - rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> - HV_HYPERCALL_REP_COMP_OFFSET; + rep_comp = hv_repcomp(status); control &= ~HV_HYPERCALL_REP_START_MASK; control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; @@ -87,7 +103,6 @@ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, return guest_id; } - /* Free the message slot and signal end-of-message if required */ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) { -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 12 May 2021 01:06:49 -0700 Subject: [PATCH 25/53] PCI: hv: Remove bus device removal unused refcount/functions With the new method of flushing/stopping the workqueue before doing bus removal, the old mechanism of using refcount and wait for completion is no longer needed. Remove those dead code. Link: https://lore.kernel.org/r/1620806809-31055-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li [lorenzo.pieralisi@arm.com: Reworded subject] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley (cherry picked from commit 326dc2e1e59a98c61c3c71616496422af522678c) --- drivers/pci/controller/pci-hyperv.c | 34 +++-------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 98aa92742198..64df545b6dcf 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -452,7 +452,6 @@ struct hv_pcibus_device { /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; enum hv_pcibus_state state; - refcount_t remove_lock; struct hv_device *hdev; resource_size_t low_mmio_space; resource_size_t high_mmio_space; @@ -460,7 +459,6 @@ struct hv_pcibus_device { struct resource *low_mmio_res; struct resource *high_mmio_res; struct completion *survey_event; - struct completion remove_event; struct pci_bus *pci_bus; spinlock_t config_lock; /* Avoid two threads writing index page */ spinlock_t device_list_lock; /* Protect lists below */ @@ -593,9 +591,6 @@ static void put_pcichild(struct hv_pci_dev *hpdev) kfree(hpdev); } -static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); -static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); - /* * There is no good way to get notified from vmbus_onoffer_rescind(), * so let's use polling here, since this is not a hot path. @@ -2067,10 +2062,8 @@ static void pci_devices_present_work(struct work_struct *work) } spin_unlock_irqrestore(&hbus->device_list_lock, flags); - if (!dr) { - put_hvpcibus(hbus); + if (!dr) return; - } /* First, mark all existing children as reported missing. */ spin_lock_irqsave(&hbus->device_list_lock, flags); @@ -2153,7 +2146,6 @@ static void pci_devices_present_work(struct work_struct *work) break; } - put_hvpcibus(hbus); kfree(dr); } @@ -2194,12 +2186,10 @@ static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus, list_add_tail(&dr->list_entry, &hbus->dr_list); spin_unlock_irqrestore(&hbus->device_list_lock, flags); - if (pending_dr) { + if (pending_dr) kfree(dr_wrk); - } else { - get_hvpcibus(hbus); + else queue_work(hbus->wq, &dr_wrk->wrk); - } return 0; } @@ -2342,8 +2332,6 @@ static void hv_eject_device_work(struct work_struct *work) put_pcichild(hpdev); put_pcichild(hpdev); /* hpdev has been freed. Do not use it any more. */ - - put_hvpcibus(hbus); } /** @@ -2367,7 +2355,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) hpdev->state = hv_pcichild_ejecting; get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); - get_hvpcibus(hbus); queue_work(hbus->wq, &hpdev->wrk); } @@ -2967,17 +2954,6 @@ static int hv_send_resources_released(struct hv_device *hdev) return 0; } -static void get_hvpcibus(struct hv_pcibus_device *hbus) -{ - refcount_inc(&hbus->remove_lock); -} - -static void put_hvpcibus(struct hv_pcibus_device *hbus) -{ - if (refcount_dec_and_test(&hbus->remove_lock)) - complete(&hbus->remove_event); -} - #define HVPCI_DOM_MAP_SIZE (64 * 1024) static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); @@ -3097,14 +3073,12 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->sysdata.domain = dom; hbus->hdev = hdev; - refcount_set(&hbus->remove_lock, 1); INIT_LIST_HEAD(&hbus->children); INIT_LIST_HEAD(&hbus->dr_list); INIT_LIST_HEAD(&hbus->resources_for_children); spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); - init_completion(&hbus->remove_event); hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, hbus->sysdata.domain); if (!hbus->wq) { @@ -3348,8 +3322,6 @@ static int hv_pci_remove(struct hv_device *hdev) hv_pci_free_bridge_windows(hbus); irq_domain_remove(hbus->irq_domain); irq_domain_free_fwnode(hbus->sysdata.fwnode); - put_hvpcibus(hbus); - wait_for_completion(&hbus->remove_event); hv_put_dom_num(hbus->sysdata.domain); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 18 May 2021 17:36:18 +0100 Subject: [PATCH 26/53] arm64: smccc: Add support for SMCCCv1.2 extended input/output registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SMCCC v1.2 allows x8-x17 to be used as parameter registers and x4—x17 to be used as result registers in SMC64/HVC64. Arm Firmware Framework for Armv8-A specification makes use of x0-x7 as parameter and result registers. There are other users like Hyper-V who intend to use beyond x0-x7 as well. Current SMCCC interface in the kernel just use x0-x7 as parameter and x0-x3 as result registers as required by SMCCCv1.0. Let us add new interface to support this extended set of input/output registers namely x0-x17 as both parameter and result registers. Acked-by: Mark Rutland Tested-by: Michael Kelley Reviewed-by: Michael Kelley Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Sudeep Holla Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210518163618.43950-1-sudeep.holla@arm.com Signed-off-by: Will Deacon (cherry picked from commit 3fdc0cb59d97f87e2cc708d424f1538e31744286) --- arch/arm64/kernel/asm-offsets.c | 9 ++++++ arch/arm64/kernel/smccc-call.S | 57 +++++++++++++++++++++++++++++++++ include/linux/arm-smccc.h | 55 +++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7d32fc959b1a..652b9e974fdc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -122,6 +122,15 @@ int main(void) DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); + DEFINE(ARM_SMCCC_1_2_REGS_X0_OFFS, offsetof(struct arm_smccc_1_2_regs, a0)); + DEFINE(ARM_SMCCC_1_2_REGS_X2_OFFS, offsetof(struct arm_smccc_1_2_regs, a2)); + DEFINE(ARM_SMCCC_1_2_REGS_X4_OFFS, offsetof(struct arm_smccc_1_2_regs, a4)); + DEFINE(ARM_SMCCC_1_2_REGS_X6_OFFS, offsetof(struct arm_smccc_1_2_regs, a6)); + DEFINE(ARM_SMCCC_1_2_REGS_X8_OFFS, offsetof(struct arm_smccc_1_2_regs, a8)); + DEFINE(ARM_SMCCC_1_2_REGS_X10_OFFS, offsetof(struct arm_smccc_1_2_regs, a10)); + DEFINE(ARM_SMCCC_1_2_REGS_X12_OFFS, offsetof(struct arm_smccc_1_2_regs, a12)); + DEFINE(ARM_SMCCC_1_2_REGS_X14_OFFS, offsetof(struct arm_smccc_1_2_regs, a14)); + DEFINE(ARM_SMCCC_1_2_REGS_X16_OFFS, offsetof(struct arm_smccc_1_2_regs, a16)); BLANK(); DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index d62447964ed9..2def9d0dd3dd 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -43,3 +43,60 @@ SYM_FUNC_START(__arm_smccc_hvc) SMCCC hvc SYM_FUNC_END(__arm_smccc_hvc) EXPORT_SYMBOL(__arm_smccc_hvc) + + .macro SMCCC_1_2 instr + /* Save `res` and free a GPR that won't be clobbered */ + stp x1, x19, [sp, #-16]! + + /* Ensure `args` won't be clobbered while loading regs in next step */ + mov x19, x0 + + /* Load the registers x0 - x17 from the struct arm_smccc_1_2_regs */ + ldp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS] + ldp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS] + ldp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS] + ldp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS] + ldp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS] + ldp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS] + ldp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS] + ldp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS] + ldp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS] + + \instr #0 + + /* Load the `res` from the stack */ + ldr x19, [sp] + + /* Store the registers x0 - x17 into the result structure */ + stp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS] + stp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS] + stp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS] + stp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS] + stp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS] + stp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS] + stp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS] + stp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS] + stp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS] + + /* Restore original x19 */ + ldp xzr, x19, [sp], #16 + ret +.endm + +/* + * void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args, + * struct arm_smccc_1_2_regs *res); + */ +SYM_FUNC_START(arm_smccc_1_2_hvc) + SMCCC_1_2 hvc +SYM_FUNC_END(arm_smccc_1_2_hvc) +EXPORT_SYMBOL(arm_smccc_1_2_hvc) + +/* + * void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args, + * struct arm_smccc_1_2_regs *res); + */ +SYM_FUNC_START(arm_smccc_1_2_smc) + SMCCC_1_2 smc +SYM_FUNC_END(arm_smccc_1_2_smc) +EXPORT_SYMBOL(arm_smccc_1_2_smc) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index f860645f6512..ee2d336ff1c1 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -155,6 +155,61 @@ struct arm_smccc_res { unsigned long a3; }; +#ifdef CONFIG_ARM64 +/** + * struct arm_smccc_1_2_regs - Arguments for or Results from SMC/HVC call + * @a0-a17 argument values from registers 0 to 17 + */ +struct arm_smccc_1_2_regs { + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + unsigned long a8; + unsigned long a9; + unsigned long a10; + unsigned long a11; + unsigned long a12; + unsigned long a13; + unsigned long a14; + unsigned long a15; + unsigned long a16; + unsigned long a17; +}; + +/** + * arm_smccc_1_2_hvc() - make HVC calls + * @args: arguments passed via struct arm_smccc_1_2_regs + * @res: result values via struct arm_smccc_1_2_regs + * + * This function is used to make HVC calls following SMC Calling Convention + * v1.2 or above. The content of the supplied param are copied from the + * structure to registers prior to the HVC instruction. The return values + * are updated with the content from registers on return from the HVC + * instruction. + */ +asmlinkage void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args, + struct arm_smccc_1_2_regs *res); + +/** + * arm_smccc_1_2_smc() - make SMC calls + * @args: arguments passed via struct arm_smccc_1_2_regs + * @res: result values via struct arm_smccc_1_2_regs + * + * This function is used to make SMC calls following SMC Calling Convention + * v1.2 or above. The content of the supplied param are copied from the + * structure to registers prior to the SMC instruction. The return values + * are updated with the content from registers on return from the SMC + * instruction. + */ +asmlinkage void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args, + struct arm_smccc_1_2_regs *res); +#endif + /** * struct arm_smccc_quirk - Contains quirk information * @id: quirk identification -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 2 Jun 2021 14:36:44 -0700 Subject: [PATCH 27/53] Drivers: hv: Move Hyper-V extended capability check to arch neutral code The extended capability query code is currently under arch/x86, but it is architecture neutral, and is used by arch neutral code in the Hyper-V balloon driver. Hence the balloon driver fails to build on other architectures. Fix by moving the ext cap code out from arch/x86. Because it is also called from built-in architecture specific code, it can't be in a module, so the Makefile treats as built-in even when CONFIG_HYPERV is "m". Also drivers/Makefile is tweaked because this is the first occurrence of a Hyper-V file that is built-in even when CONFIG_HYPERV is "m". While here, update the hypercall status check to use the new helper function instead of open coding. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Sunil Muthuswamy Link: https://lore.kernel.org/r/1622669804-2016-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit a4d7e8ae4a541557d7a2c815835b786c18c3613c) --- arch/x86/hyperv/hv_init.c | 47 ---------------------------- drivers/Makefile | 2 +- drivers/hv/Makefile | 3 ++ drivers/hv/hv_common.c | 66 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 48 deletions(-) create mode 100644 drivers/hv/hv_common.c diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1c194e287866..f879eb81b06d 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -614,50 +614,3 @@ bool hv_is_isolation_supported(void) return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; } EXPORT_SYMBOL_GPL(hv_is_isolation_supported); - -/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ -bool hv_query_ext_cap(u64 cap_query) -{ - /* - * The address of the 'hv_extended_cap' variable will be used as an - * output parameter to the hypercall below and so it should be - * compatible with 'virt_to_phys'. Which means, it's address should be - * directly mapped. Use 'static' to keep it compatible; stack variables - * can be virtually mapped, making them imcompatible with - * 'virt_to_phys'. - * Hypercall input/output addresses should also be 8-byte aligned. - */ - static u64 hv_extended_cap __aligned(8); - static bool hv_extended_cap_queried; - u64 status; - - /* - * Querying extended capabilities is an extended hypercall. Check if the - * partition supports extended hypercall, first. - */ - if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) - return false; - - /* Extended capabilities do not change at runtime. */ - if (hv_extended_cap_queried) - return hv_extended_cap & cap_query; - - status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, - &hv_extended_cap); - - /* - * The query extended capabilities hypercall should not fail under - * any normal circumstances. Avoid repeatedly making the hypercall, on - * error. - */ - hv_extended_cap_queried = true; - status &= HV_HYPERCALL_RESULT_MASK; - if (status != HV_STATUS_SUCCESS) { - pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", - status); - return false; - } - - return hv_extended_cap & cap_query; -} -EXPORT_SYMBOL_GPL(hv_query_ext_cap); diff --git a/drivers/Makefile b/drivers/Makefile index 576228037718..bd462459aa67 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -160,7 +160,7 @@ obj-$(CONFIG_SOUNDWIRE) += soundwire/ # Virtualization drivers obj-$(CONFIG_VIRT_DRIVERS) += virt/ -obj-$(CONFIG_HYPERV) += hv/ +obj-$(subst m,y,$(CONFIG_HYPERV)) += hv/ obj-$(CONFIG_PM_DEVFREQ) += devfreq/ obj-$(CONFIG_EXTCON) += extcon/ diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index 94daf8240c95..d76df5c8c2a9 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -11,3 +11,6 @@ hv_vmbus-y := vmbus_drv.o \ channel_mgmt.o ring_buffer.o hv_trace.o hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o + +# Code that must be built-in +obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c new file mode 100644 index 000000000000..f0053c786891 --- /dev/null +++ b/drivers/hv/hv_common.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Architecture neutral utility routines for interacting with + * Hyper-V. This file is specifically for code that must be + * built-in to the kernel image when CONFIG_HYPERV is set + * (vs. being in a module) because it is called from architecture + * specific code under arch/. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley + */ + +#include +#include +#include +#include +#include + + +/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ +bool hv_query_ext_cap(u64 cap_query) +{ + /* + * The address of the 'hv_extended_cap' variable will be used as an + * output parameter to the hypercall below and so it should be + * compatible with 'virt_to_phys'. Which means, it's address should be + * directly mapped. Use 'static' to keep it compatible; stack variables + * can be virtually mapped, making them imcompatible with + * 'virt_to_phys'. + * Hypercall input/output addresses should also be 8-byte aligned. + */ + static u64 hv_extended_cap __aligned(8); + static bool hv_extended_cap_queried; + u64 status; + + /* + * Querying extended capabilities is an extended hypercall. Check if the + * partition supports extended hypercall, first. + */ + if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) + return false; + + /* Extended capabilities do not change at runtime. */ + if (hv_extended_cap_queried) + return hv_extended_cap & cap_query; + + status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, + &hv_extended_cap); + + /* + * The query extended capabilities hypercall should not fail under + * any normal circumstances. Avoid repeatedly making the hypercall, on + * error. + */ + hv_extended_cap_queried = true; + if (!hv_result_success(status)) { + pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", + status); + return false; + } + + return hv_extended_cap & cap_query; +} +EXPORT_SYMBOL_GPL(hv_query_ext_cap); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:52 +0800 Subject: [PATCH 28/53] arm64: PCI: Restructure pcibios_root_bridge_prepare() Restructure the pcibios_root_bridge_prepare() as the preparation for supporting cases when no real ACPI device is related to the PCI host bridge. No functional change. Link: https://lore.kernel.org/r/20210726180657.142727-4-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi Acked-by: Catalin Marinas (cherry picked from commit b424d4d4263200459615c87ad8dddaf4bb571a9d) --- arch/arm64/kernel/pci.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 1006ed2d7c60..5148ae242780 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -82,14 +82,19 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - if (!acpi_disabled) { - struct pci_config_window *cfg = bridge->bus->sysdata; - struct acpi_device *adev = to_acpi_device(cfg->parent); - struct device *bus_dev = &bridge->bus->dev; + struct pci_config_window *cfg; + struct acpi_device *adev; + struct device *bus_dev; - ACPI_COMPANION_SET(&bridge->dev, adev); - set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); - } + if (acpi_disabled) + return 0; + + cfg = bridge->bus->sysdata; + adev = to_acpi_device(cfg->parent); + bus_dev = &bridge->bus->dev; + + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); return 0; } -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:53 +0800 Subject: [PATCH 29/53] arm64: PCI: Support root bridge preparation for Hyper-V Currently at root bridge preparation, the corresponding ACPI device will be set as the companion, however for a Hyper-V virtual PCI root bridge, there is no corresponding ACPI device, because a Hyper-V virtual PCI root bridge is discovered via VMBus rather than ACPI table. In order to support this, we need to make pcibios_root_bridge_prepare() work with cfg->parent being NULL. Use a NULL pointer as the ACPI device if there is no corresponding ACPI device, and this is fine because: 1) ACPI_COMPANION_SET() can work with the second parameter being NULL, 2) semantically, if a NULL pointer is set via ACPI_COMPANION_SET(), ACPI_COMPANION() (the read API for this field) will return NULL, and since ACPI_COMPANION() may return NULL, so users must have handled the cases where it returns NULL, and 3) since there is no corresponding ACPI device, it would be wrong to use any other value here. Link: https://lore.kernel.org/r/20210726180657.142727-5-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi Acked-by: Catalin Marinas (cherry picked from commit 7d40c0f70d92291605c4498b8ee4b3a3c3ba07b1) --- arch/arm64/kernel/pci.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 5148ae242780..2276689b5411 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -90,7 +90,17 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; cfg = bridge->bus->sysdata; - adev = to_acpi_device(cfg->parent); + + /* + * On Hyper-V there is no corresponding ACPI device for a root bridge, + * therefore ->parent is set as NULL by the driver. And set 'adev' as + * NULL in this case because there is no proper ACPI device. + */ + if (!cfg->parent) + adev = NULL; + else + adev = to_acpi_device(cfg->parent); + bus_dev = &bridge->bus->dev; ACPI_COMPANION_SET(&bridge->dev, adev); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 11 Jul 2021 19:50:04 -0700 Subject: [PATCH 30/53] asm-generic/hyperv: Add missing #include of nmi.h The recent move of hv_do_rep_hypercall() to this file adds a reference to touch_nmi_watchdog(). Its function definition is included indirectly when compiled on x86, but not when compiled on ARM64. So add the explicit #include. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1626058204-2106-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit ba3f5839fbeb3f9e65070d90aa4e66008bbea80f) --- include/asm-generic/mshyperv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 7a2492fb0ee1..2c7500e4810f 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:55 +0800 Subject: [PATCH 31/53] PCI: hv: Set ->domain_nr of pci_host_bridge at probing time No functional change, just store and maintain the PCI domain number in the ->domain_nr of pci_host_bridge. Note that we still need to keep the copy of domain number in x86-specific pci_sysdata, because x86 is not a PCI_DOMAINS_GENERIC=y architecture, so the ->domain_nr of pci_host_bridge doesn't work for it yet. Link: https://lore.kernel.org/r/20210726180657.142727-7-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi (cherry picked from commit 38c0d266dc80b81f7f72314620f01ff6a1e119fe) --- drivers/pci/controller/pci-hyperv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 64df545b6dcf..3cbb04b1e886 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2303,7 +2303,7 @@ static void hv_eject_device_work(struct work_struct *work) * because hbus->pci_bus may not exist yet. */ wslot = wslot_to_devfn(hpdev->desc.win_slot.slot); - pdev = pci_get_domain_bus_and_slot(hbus->sysdata.domain, 0, wslot); + pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot); if (pdev) { pci_lock_rescan_remove(); pci_stop_and_remove_bus_device(pdev); @@ -3070,6 +3070,7 @@ static int hv_pci_probe(struct hv_device *hdev, "PCI dom# 0x%hx has collision, using 0x%hx", dom_req, dom); + hbus->bridge->domain_nr = dom; hbus->sysdata.domain = dom; hbus->hdev = hdev; @@ -3080,7 +3081,7 @@ static int hv_pci_probe(struct hv_device *hdev, spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, - hbus->sysdata.domain); + hbus->bridge->domain_nr); if (!hbus->wq) { ret = -ENOMEM; goto free_dom; @@ -3207,7 +3208,7 @@ static int hv_pci_probe(struct hv_device *hdev, destroy_wq: destroy_workqueue(hbus->wq); free_dom: - hv_put_dom_num(hbus->sysdata.domain); + hv_put_dom_num(hbus->bridge->domain_nr); free_bus: kfree(hbus); return ret; @@ -3323,7 +3324,7 @@ static int hv_pci_remove(struct hv_device *hdev) irq_domain_remove(hbus->irq_domain); irq_domain_free_fwnode(hbus->sysdata.fwnode); - hv_put_dom_num(hbus->sysdata.domain); + hv_put_dom_num(hbus->bridge->domain_nr); kfree(hbus); return ret; -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Jul 2021 02:06:54 +0800 Subject: [PATCH 32/53] PCI: hv: Generify PCI probing In order to support ARM64 Hyper-V PCI, we need to set up the bridge at probing time because ARM64 is a PCI_DOMAIN_GENERIC=y arch and we don't have pci_config_window (ARM64 sysdata) for a PCI root bus on Hyper-V, so it's impossible to retrieve the information (e.g. PCI domains, MSI domains) from bus sysdata on ARM64 after creation. Originally in create_root_hv_pci_bus(), pci_create_root_bus() is used to create the root bus and the corresponding bridge based on x86 sysdata. Now we create a bridge first and then call pci_scan_root_bus_bridge(), which allows us to do the necessary set-ups for the bridge. Link: https://lore.kernel.org/r/20210726180657.142727-6-boqun.feng@gmail.com Signed-off-by: Arnd Bergmann Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi (cherry picked from commit 418cb6c8e051119125b886c879efdacb04df7165) --- drivers/pci/controller/pci-hyperv.c | 57 +++++++++++++++-------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 3cbb04b1e886..f9dc82258fdc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -449,6 +449,7 @@ enum hv_pcibus_state { struct hv_pcibus_device { struct pci_sysdata sysdata; + struct pci_host_bridge *bridge; /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; enum hv_pcibus_state state; @@ -464,8 +465,6 @@ struct hv_pcibus_device { spinlock_t device_list_lock; /* Protect lists below */ void __iomem *cfg_addr; - struct list_head resources_for_children; - struct list_head children; struct list_head dr_list; @@ -1798,7 +1797,7 @@ static void hv_pci_assign_slots(struct hv_pcibus_device *hbus) slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot)); snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser); - hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr, + hpdev->pci_slot = pci_create_slot(hbus->bridge->bus, slot_nr, name, NULL); if (IS_ERR(hpdev->pci_slot)) { pr_warn("pci_create slot %s failed\n", name); @@ -1828,7 +1827,7 @@ static void hv_pci_remove_slots(struct hv_pcibus_device *hbus) static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) { struct pci_dev *dev; - struct pci_bus *bus = hbus->pci_bus; + struct pci_bus *bus = hbus->bridge->bus; struct hv_pci_dev *hv_dev; list_for_each_entry(dev, &bus->devices, bus_list) { @@ -1851,24 +1850,25 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) */ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus) { - /* Register the device */ - hbus->pci_bus = pci_create_root_bus(&hbus->hdev->device, - 0, /* bus number is always zero */ - &hv_pcifront_ops, - &hbus->sysdata, - &hbus->resources_for_children); - if (!hbus->pci_bus) - return -ENODEV; + int error; + struct pci_host_bridge *bridge = hbus->bridge; + + bridge->dev.parent = &hbus->hdev->device; + bridge->sysdata = &hbus->sysdata; + bridge->ops = &hv_pcifront_ops; + + error = pci_scan_root_bus_bridge(bridge); + if (error) + return error; hbus->pci_bus->msi = &hbus->msi_chip; hbus->pci_bus->msi->dev = &hbus->hdev->device; pci_lock_rescan_remove(); - pci_scan_child_bus(hbus->pci_bus); hv_pci_assign_numa_node(hbus); - pci_bus_assign_resources(hbus->pci_bus); + pci_bus_assign_resources(bridge->bus); hv_pci_assign_slots(hbus); - pci_bus_add_devices(hbus->pci_bus); + pci_bus_add_devices(bridge->bus); pci_unlock_rescan_remove(); hbus->state = hv_pcibus_installed; return 0; @@ -2131,7 +2131,7 @@ static void pci_devices_present_work(struct work_struct *work) * because there may have been changes. */ pci_lock_rescan_remove(); - pci_scan_child_bus(hbus->pci_bus); + pci_scan_child_bus(hbus->bridge->bus); hv_pci_assign_numa_node(hbus); hv_pci_assign_slots(hbus); pci_unlock_rescan_remove(); @@ -2299,8 +2299,8 @@ static void hv_eject_device_work(struct work_struct *work) /* * Ejection can come before or after the PCI bus has been set up, so * attempt to find it and tear down the bus state, if it exists. This - * must be done without constructs like pci_domain_nr(hbus->pci_bus) - * because hbus->pci_bus may not exist yet. + * must be done without constructs like pci_domain_nr(hbus->bridge->bus) + * because hbus->bridge->bus may not exist yet. */ wslot = wslot_to_devfn(hpdev->desc.win_slot.slot); pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot); @@ -2666,8 +2666,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus) /* Modify this resource to become a bridge window. */ hbus->low_mmio_res->flags |= IORESOURCE_WINDOW; hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY; - pci_add_resource(&hbus->resources_for_children, - hbus->low_mmio_res); + pci_add_resource(&hbus->bridge->windows, hbus->low_mmio_res); } if (hbus->high_mmio_space) { @@ -2686,8 +2685,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus) /* Modify this resource to become a bridge window. */ hbus->high_mmio_res->flags |= IORESOURCE_WINDOW; hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY; - pci_add_resource(&hbus->resources_for_children, - hbus->high_mmio_res); + pci_add_resource(&hbus->bridge->windows, hbus->high_mmio_res); } return 0; @@ -3006,6 +3004,7 @@ static void hv_put_dom_num(u16 dom) static int hv_pci_probe(struct hv_device *hdev, const struct hv_vmbus_device_id *dev_id) { + struct pci_host_bridge *bridge; struct hv_pcibus_device *hbus; u16 dom_req, dom; char *name; @@ -3018,6 +3017,10 @@ static int hv_pci_probe(struct hv_device *hdev, */ BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE); + bridge = devm_pci_alloc_host_bridge(&hdev->device, 0); + if (!bridge) + return -ENOMEM; + /* * With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural * alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate @@ -3039,6 +3042,8 @@ static int hv_pci_probe(struct hv_device *hdev, hbus = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!hbus) return -ENOMEM; + + hbus->bridge = bridge; hbus->state = hv_pcibus_init; hbus->wslot_res_allocated = -1; @@ -3076,7 +3081,6 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->hdev = hdev; INIT_LIST_HEAD(&hbus->children); INIT_LIST_HEAD(&hbus->dr_list); - INIT_LIST_HEAD(&hbus->resources_for_children); spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); @@ -3307,9 +3311,9 @@ static int hv_pci_remove(struct hv_device *hdev) /* Remove the bus from PCI's point of view. */ pci_lock_rescan_remove(); - pci_stop_root_bus(hbus->pci_bus); + pci_stop_root_bus(hbus->bridge->bus); hv_pci_remove_slots(hbus); - pci_remove_root_bus(hbus->pci_bus); + pci_remove_root_bus(hbus->bridge->bus); pci_unlock_rescan_remove(); } @@ -3319,7 +3323,6 @@ static int hv_pci_remove(struct hv_device *hdev) iounmap(hbus->cfg_addr); hv_free_config_window(hbus); - pci_free_resource_list(&hbus->resources_for_children); hv_pci_free_bridge_windows(hbus); irq_domain_remove(hbus->irq_domain); irq_domain_free_fwnode(hbus->sysdata.fwnode); @@ -3402,7 +3405,7 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg) */ static void hv_pci_restore_msi_state(struct hv_pcibus_device *hbus) { - pci_walk_bus(hbus->pci_bus, hv_pci_restore_msi_msg, NULL); + pci_walk_bus(hbus->bridge->bus, hv_pci_restore_msi_msg, NULL); } static int hv_pci_resume(struct hv_device *hdev) -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:50 +0800 Subject: [PATCH 33/53] PCI: Introduce domain_nr in pci_host_bridge Currently we retrieve the PCI domain number of the host bridge from the bus sysdata (or pci_config_window if PCI_DOMAINS_GENERIC=y). Actually we have the information at PCI host bridge probing time, and it makes sense that we store it into pci_host_bridge. One benefit of doing so is the requirement for supporting PCI on Hyper-V for ARM64, because the host bridge of Hyper-V doesn't have pci_config_window, whereas ARM64 is a PCI_DOMAINS_GENERIC=y arch, so we cannot retrieve the PCI domain number from pci_config_window on ARM64 Hyper-V guest. As the preparation for ARM64 Hyper-V PCI support, we introduce the domain_nr in pci_host_bridge and a sentinel value to allow drivers to set domain numbers properly at probing time. Currently CONFIG_PCI_DOMAINS_GENERIC=y archs are only users of this newly-introduced field. Link: https://lore.kernel.org/r/20210726180657.142727-2-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas (cherry picked from commit 15d82ca23c996d50062286d27ed6a42a8105c04a) --- drivers/pci/probe.c | 6 +++++- include/linux/pci.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ece90a23936d..7e4cb5f7c9ca 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -594,6 +594,7 @@ static void pci_init_host_bridge(struct pci_host_bridge *bridge) bridge->native_pme = 1; bridge->native_ltr = 1; bridge->native_dpc = 1; + bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET; device_initialize(&bridge->dev); } @@ -898,7 +899,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) bus->ops = bridge->ops; bus->number = bus->busn_res.start = bridge->busnr; #ifdef CONFIG_PCI_DOMAINS_GENERIC - bus->domain_nr = pci_bus_find_domain_nr(bus, parent); + if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET) + bus->domain_nr = pci_bus_find_domain_nr(bus, parent); + else + bus->domain_nr = bridge->domain_nr; #endif b = pci_find_bus(pci_domain_nr(bus), bridge->busnr); diff --git a/include/linux/pci.h b/include/linux/pci.h index a55097b4d992..25b4aa103029 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -521,6 +521,16 @@ static inline int pci_channel_offline(struct pci_dev *pdev) return (pdev->error_state != pci_channel_io_normal); } +/* + * Currently in ACPI spec, for each PCI host bridge, PCI Segment + * Group number is limited to a 16-bit value, therefore (int)-1 is + * not a valid PCI domain number, and can be used as a sentinel + * value indicating ->domain_nr is not set by the driver (and + * CONFIG_PCI_DOMAINS_GENERIC=y archs will set it with + * pci_bus_find_domain_nr()). + */ +#define PCI_DOMAIN_NR_NOT_SET (-1) + struct pci_host_bridge { struct device dev; struct pci_bus *bus; /* Root bus */ @@ -528,6 +538,7 @@ struct pci_host_bridge { struct pci_ops *child_ops; void *sysdata; int busnr; + int domain_nr; struct list_head windows; /* resource_entry */ struct list_head dma_ranges; /* dma ranges resource list */ u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */ -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:51 +0800 Subject: [PATCH 34/53] PCI: Support populating MSI domains of root buses via bridges Currently, at probing time, the MSI domains of root buses are populated if either the information of MSI domain is available from firmware (DT or ACPI), or arch-specific sysdata is used to pass the fwnode of the MSI domain. These two conditions don't cover all, e.g. Hyper-V virtual PCI on ARM64, which doesn't have the MSI information in the firmware and couldn't use arch-specific sysdata because running on an architecture with PCI_DOMAINS_GENERIC=y. To support populating MSI domains of the root buses at the probing when neither of the above condition is true, the ->msi_domain of the corresponding bridge device is used: in pci_host_bridge_msi_domain(), which should return the MSI domain of the root bus, the ->msi_domain of the corresponding bridge is fetched first as a potential value of the MSI domain of the root bus. In order to use the approach to populate MSI domains, the driver needs to dev_set_msi_domain() on the bridge before calling pci_register_host_bridge(), and makes sure GENERIC_MSI_IRQ_DOMAIN=y. Another advantage of this new approach is providing an arch-independent way to populate MSI domains, which allows sharing the driver code as much as possible between architectures. Originally-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210726180657.142727-3-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas (cherry picked from commit 41dd40fd717997085588442821f4463e05c758cf) --- drivers/pci/probe.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 7e4cb5f7c9ca..a49ca043ded8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -828,11 +828,15 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus) { struct irq_domain *d; + /* If the host bridge driver sets a MSI domain of the bridge, use it */ + d = dev_get_msi_domain(bus->bridge); + /* * Any firmware interface that can resolve the msi_domain * should be called from here. */ - d = pci_host_bridge_of_msi_domain(bus); + if (!d) + d = pci_host_bridge_of_msi_domain(bus); if (!d) d = pci_host_bridge_acpi_msi_domain(bus); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 4 Aug 2021 08:52:39 -0700 Subject: [PATCH 35/53] Drivers: hv: Enable Hyper-V code to be built on ARM64 Update drivers/hv/Kconfig so CONFIG_HYPERV can be selected on ARM64, causing the Hyper-V specific code to be built. Exclude the Hyper-V enlightened clocks/timers code from being built for ARM64. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Acked-by: Marc Zyngier Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1628092359-61351-6-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 7aff79e297ee1aa0126924921fd87a4ae59d2467) --- drivers/hv/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 66c794d92391..d1123ceb38f3 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -4,15 +4,16 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST + depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ + || (ARM64 && !CPU_BIG_ENDIAN)) select PARAVIRT - select X86_HV_CALLBACK_VECTOR + select X86_HV_CALLBACK_VECTOR if X86 help Select this option to run Linux as a Hyper-V client operating system. config HYPERV_TIMER - def_bool HYPERV + def_bool HYPERV && X86 config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 4 Aug 2021 08:52:35 -0700 Subject: [PATCH 36/53] arm64: hyperv: Add Hyper-V hypercall and register access utilities hyperv-tlfs.h defines Hyper-V interfaces from the Hyper-V Top Level Functional Spec (TLFS), and #includes the architecture-independent part of hyperv-tlfs.h in include/asm-generic. The published TLFS is distinctly oriented to x86/x64, so the ARM64-specific hyperv-tlfs.h includes information for ARM64 that is not yet formally published. The TLFS is available here: docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs mshyperv.h defines Linux-specific structures and routines for interacting with Hyper-V on ARM64, and #includes the architecture- independent part of mshyperv.h in include/asm-generic. Use these definitions to provide utility functions to make Hyper-V hypercalls and to get and set Hyper-V provided registers associated with a virtual processor. Signed-off-by: Michael Kelley Reviewed-by: Sunil Muthuswamy Acked-by: Marc Zyngier Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1628092359-61351-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 57d276bbbd322409bb6f7c6446187a29953f8ded) --- MAINTAINERS | 3 + arch/arm64/Kbuild | 1 + arch/arm64/hyperv/Makefile | 2 + arch/arm64/hyperv/hv_core.c | 129 +++++++++++++++++++++++++++ arch/arm64/include/asm/hyperv-tlfs.h | 69 ++++++++++++++ arch/arm64/include/asm/mshyperv.h | 54 +++++++++++ 6 files changed, 258 insertions(+) create mode 100644 arch/arm64/hyperv/Makefile create mode 100644 arch/arm64/hyperv/hv_core.c create mode 100644 arch/arm64/include/asm/hyperv-tlfs.h create mode 100644 arch/arm64/include/asm/mshyperv.h diff --git a/MAINTAINERS b/MAINTAINERS index 4fef10dd2975..aad9e6c8af38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8154,6 +8154,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git F: Documentation/ABI/stable/sysfs-bus-vmbus F: Documentation/ABI/testing/debugfs-hyperv F: Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst +F: arch/arm64/hyperv +F: arch/arm64/include/asm/hyperv-tlfs.h +F: arch/arm64/include/asm/mshyperv.h F: arch/x86/hyperv F: arch/x86/include/asm/hyperv-tlfs.h F: arch/x86/include/asm/mshyperv.h diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index d6465823b281..7a37608fed76 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -3,4 +3,5 @@ obj-y += kernel/ mm/ obj-$(CONFIG_NET) += net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ +obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile new file mode 100644 index 000000000000..1697d30ff106 --- /dev/null +++ b/arch/arm64/hyperv/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y := hv_core.o diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c new file mode 100644 index 000000000000..4c5dc0f51b12 --- /dev/null +++ b/arch/arm64/hyperv/hv_core.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Low level utility routines for interacting with Hyper-V. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * hv_do_hypercall- Invoke the specified hypercall + */ +u64 hv_do_hypercall(u64 control, void *input, void *output) +{ + struct arm_smccc_res res; + u64 input_address; + u64 output_address; + + input_address = input ? virt_to_phys(input) : 0; + output_address = output ? virt_to_phys(output) : 0; + + arm_smccc_1_1_hvc(HV_FUNC_ID, control, + input_address, output_address, &res); + return res.a0; +} +EXPORT_SYMBOL_GPL(hv_do_hypercall); + +/* + * hv_do_fast_hypercall8 -- Invoke the specified hypercall + * with arguments in registers instead of physical memory. + * Avoids the overhead of virt_to_phys for simple hypercalls. + */ + +u64 hv_do_fast_hypercall8(u16 code, u64 input) +{ + struct arm_smccc_res res; + u64 control; + + control = (u64)code | HV_HYPERCALL_FAST_BIT; + + arm_smccc_1_1_hvc(HV_FUNC_ID, control, input, &res); + return res.a0; +} +EXPORT_SYMBOL_GPL(hv_do_fast_hypercall8); + +/* + * Set a single VP register to a 64-bit value. + */ +void hv_set_vpreg(u32 msr, u64 value) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_hvc(HV_FUNC_ID, + HVCALL_SET_VP_REGISTERS | HV_HYPERCALL_FAST_BIT | + HV_HYPERCALL_REP_COMP_1, + HV_PARTITION_ID_SELF, + HV_VP_INDEX_SELF, + msr, + 0, + value, + 0, + &res); + + /* + * Something is fundamentally broken in the hypervisor if + * setting a VP register fails. There's really no way to + * continue as a guest VM, so panic. + */ + BUG_ON(!hv_result_success(res.a0)); +} +EXPORT_SYMBOL_GPL(hv_set_vpreg); + +/* + * Get the value of a single VP register. One version + * returns just 64 bits and another returns the full 128 bits. + * The two versions are separate to avoid complicating the + * calling sequence for the more frequently used 64 bit version. + */ + +void hv_get_vpreg_128(u32 msr, struct hv_get_vp_registers_output *result) +{ + struct arm_smccc_1_2_regs args; + struct arm_smccc_1_2_regs res; + + args.a0 = HV_FUNC_ID; + args.a1 = HVCALL_GET_VP_REGISTERS | HV_HYPERCALL_FAST_BIT | + HV_HYPERCALL_REP_COMP_1; + args.a2 = HV_PARTITION_ID_SELF; + args.a3 = HV_VP_INDEX_SELF; + args.a4 = msr; + + /* + * Use the SMCCC 1.2 interface because the results are in registers + * beyond X0-X3. + */ + arm_smccc_1_2_hvc(&args, &res); + + /* + * Something is fundamentally broken in the hypervisor if + * getting a VP register fails. There's really no way to + * continue as a guest VM, so panic. + */ + BUG_ON(!hv_result_success(res.a0)); + + result->as64.low = res.a6; + result->as64.high = res.a7; +} +EXPORT_SYMBOL_GPL(hv_get_vpreg_128); + +u64 hv_get_vpreg(u32 msr) +{ + struct hv_get_vp_registers_output output; + + hv_get_vpreg_128(msr, &output); + + return output.as64.low; +} +EXPORT_SYMBOL_GPL(hv_get_vpreg); diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h new file mode 100644 index 000000000000..4d964a7f02ee --- /dev/null +++ b/arch/arm64/include/asm/hyperv-tlfs.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file contains definitions from the Hyper-V Hypervisor Top-Level + * Functional Specification (TLFS): + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley + */ + +#ifndef _ASM_HYPERV_TLFS_H +#define _ASM_HYPERV_TLFS_H + +#include + +/* + * All data structures defined in the TLFS that are shared between Hyper-V + * and a guest VM use Little Endian byte ordering. This matches the default + * byte ordering of Linux running on ARM64, so no special handling is required. + */ + +/* + * These Hyper-V registers provide information equivalent to the CPUID + * instruction on x86/x64. + */ +#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ +#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ +#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ + +/* + * Group C Features. See the asm-generic version of hyperv-tlfs.h + * for a description of Feature Groups. + */ + +/* Crash MSRs available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) + +/* STIMER direct mode is available */ +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) + +/* + * Synthetic register definitions equivalent to MSRs on x86/x64 + */ +#define HV_REGISTER_CRASH_P0 0x00000210 +#define HV_REGISTER_CRASH_P1 0x00000211 +#define HV_REGISTER_CRASH_P2 0x00000212 +#define HV_REGISTER_CRASH_P3 0x00000213 +#define HV_REGISTER_CRASH_P4 0x00000214 +#define HV_REGISTER_CRASH_CTL 0x00000215 + +#define HV_REGISTER_GUEST_OSID 0x00090002 +#define HV_REGISTER_VP_INDEX 0x00090003 +#define HV_REGISTER_TIME_REF_COUNT 0x00090004 +#define HV_REGISTER_REFERENCE_TSC 0x00090017 + +#define HV_REGISTER_SINT0 0x000A0000 +#define HV_REGISTER_SCONTROL 0x000A0010 +#define HV_REGISTER_SIEFP 0x000A0012 +#define HV_REGISTER_SIMP 0x000A0013 +#define HV_REGISTER_EOM 0x000A0014 + +#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 +#define HV_REGISTER_STIMER0_COUNT 0x000B0001 + +#include + +#endif diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h new file mode 100644 index 000000000000..20070a847304 --- /dev/null +++ b/arch/arm64/include/asm/mshyperv.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Linux-specific definitions for managing interactions with Microsoft's + * Hyper-V hypervisor. The definitions in this file are specific to + * the ARM64 architecture. See include/asm-generic/mshyperv.h for + * definitions are that architecture independent. + * + * Definitions that are specified in the Hyper-V Top Level Functional + * Spec (TLFS) should not go in this file, but should instead go in + * hyperv-tlfs.h. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley + */ + +#ifndef _ASM_MSHYPERV_H +#define _ASM_MSHYPERV_H + +#include +#include +#include + +/* + * Declare calls to get and set Hyper-V VP register values on ARM64, which + * requires a hypercall. + */ + +void hv_set_vpreg(u32 reg, u64 value); +u64 hv_get_vpreg(u32 reg); +void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result); + +static inline void hv_set_register(unsigned int reg, u64 value) +{ + hv_set_vpreg(reg, value); +} + +static inline u64 hv_get_register(unsigned int reg) +{ + return hv_get_vpreg(reg); +} + +/* SMCCC hypercall parameters */ +#define HV_SMCCC_FUNC_NUMBER 1 +#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \ + ARM_SMCCC_STD_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + HV_SMCCC_FUNC_NUMBER) + +#include + +#endif -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 4 Aug 2021 08:52:37 -0700 Subject: [PATCH 37/53] arm64: hyperv: Initialize hypervisor on boot Add ARM64-specific code to initialize the Hyper-V hypervisor when booting as a guest VM. This code is built only when CONFIG_HYPERV is enabled. Signed-off-by: Michael Kelley Acked-by: Marc Zyngier Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1628092359-61351-4-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 9bbb888824e38cc2e9118ed351fe3d22403a73e1) --- arch/arm64/hyperv/Makefile | 2 +- arch/arm64/hyperv/mshyperv.c | 87 ++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/hyperv/mshyperv.c diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile index 1697d30ff106..87c31c001da9 100644 --- a/arch/arm64/hyperv/Makefile +++ b/arch/arm64/hyperv/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := hv_core.o +obj-y := hv_core.o mshyperv.o diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c new file mode 100644 index 000000000000..bbbe351e9045 --- /dev/null +++ b/arch/arm64/hyperv/mshyperv.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Core routines for interacting with Microsoft's Hyper-V hypervisor, + * including hypervisor initialization. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley + */ + +#include +#include +#include +#include +#include +#include +#include + +static bool hyperv_initialized; + +static int __init hyperv_init(void) +{ + struct hv_get_vp_registers_output result; + u32 a, b, c, d; + u64 guest_id; + int ret; + + /* + * Allow for a kernel built with CONFIG_HYPERV to be running in + * a non-Hyper-V environment, including on DT instead of ACPI. + * In such cases, do nothing and return success. + */ + if (acpi_disabled) + return 0; + + if (strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8)) + return 0; + + /* Setup the guest ID */ + guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); + hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id); + + /* Get the features and hints from Hyper-V */ + hv_get_vpreg_128(HV_REGISTER_FEATURES, &result); + ms_hyperv.features = result.as32.a; + ms_hyperv.priv_high = result.as32.b; + ms_hyperv.misc_features = result.as32.c; + + hv_get_vpreg_128(HV_REGISTER_ENLIGHTENMENTS, &result); + ms_hyperv.hints = result.as32.a; + + pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints, + ms_hyperv.misc_features); + + /* Get information about the Hyper-V host version */ + hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, &result); + a = result.as32.a; + b = result.as32.b; + c = result.as32.c; + d = result.as32.d; + pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", + b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24); + + ret = hv_common_init(); + if (ret) + return ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", + hv_common_cpu_init, hv_common_cpu_die); + if (ret < 0) { + hv_common_free(); + return ret; + } + + hyperv_initialized = true; + return 0; +} + +early_initcall(hyperv_init); + +bool hv_is_hyperv_initialized(void) +{ + return hyperv_initialized; +} +EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 4 Aug 2021 08:52:36 -0700 Subject: [PATCH 38/53] arm64: hyperv: Add panic handler Add a function to inform Hyper-V about a guest panic. This code is built only when CONFIG_HYPERV is enabled. Signed-off-by: Michael Kelley Reviewed-by: Sunil Muthuswamy Reviewed-by: Boqun Feng Acked-by: Marc Zyngier Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1628092359-61351-3-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 512c1117fb2eeb944df1b88bff6e0c002990b369) --- arch/arm64/hyperv/hv_core.c | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c index 4c5dc0f51b12..b54c34793701 100644 --- a/arch/arm64/hyperv/hv_core.c +++ b/arch/arm64/hyperv/hv_core.c @@ -127,3 +127,55 @@ u64 hv_get_vpreg(u32 msr) return output.as64.low; } EXPORT_SYMBOL_GPL(hv_get_vpreg); + +/* + * hyperv_report_panic - report a panic to Hyper-V. This function uses + * the older version of the Hyper-V interface that admittedly doesn't + * pass enough information to be useful beyond just recording the + * occurrence of a panic. The parallel hv_kmsg_dump() uses the + * new interface that allows reporting 4 Kbytes of data, which is much + * more useful. Hyper-V on ARM64 always supports the newer interface, but + * we retain support for the older version because the sysadmin is allowed + * to disable the newer version via sysctl in case of information security + * concerns about the more verbose version. + */ +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) +{ + static bool panic_reported; + u64 guest_id; + + /* Don't report a panic to Hyper-V if we're not going to panic */ + if (in_die && !panic_on_oops) + return; + + /* + * We prefer to report panic on 'die' chain as we have proper + * registers to report, but if we miss it (e.g. on BUG()) we need + * to report it on 'panic'. + * + * Calling code in the 'die' and 'panic' paths ensures that only + * one CPU is running this code, so no atomicity is needed. + */ + if (panic_reported) + return; + panic_reported = true; + + guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OSID); + + /* + * Hyper-V provides the ability to store only 5 values. + * Pick the passed in error value, the guest_id, the PC, + * and the SP. + */ + hv_set_vpreg(HV_REGISTER_CRASH_P0, err); + hv_set_vpreg(HV_REGISTER_CRASH_P1, guest_id); + hv_set_vpreg(HV_REGISTER_CRASH_P2, regs->pc); + hv_set_vpreg(HV_REGISTER_CRASH_P3, regs->sp); + hv_set_vpreg(HV_REGISTER_CRASH_P4, 0); + + /* + * Let Hyper-V know there is crash data available + */ + hv_set_vpreg(HV_REGISTER_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); +} +EXPORT_SYMBOL_GPL(hyperv_report_panic); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 4 Aug 2021 08:52:38 -0700 Subject: [PATCH 39/53] arm64: efi: Export screen_info The Hyper-V frame buffer driver may be built as a module, and it needs access to screen_info. So export screen_info. Signed-off-by: Michael Kelley Acked-by: Ard Biesheuvel Acked-by: Marc Zyngier Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1628092359-61351-5-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 9b16c2132f34316bf0b59d24357a788cc1e9e352) --- arch/arm64/kernel/efi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index fa02efb28e88..e1be6c429810 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -55,6 +55,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) /* we will fill this structure from the stub, so don't put it in .bss */ struct screen_info screen_info __section(".data"); +EXPORT_SYMBOL(screen_info); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:57 +0800 Subject: [PATCH 40/53] PCI: hv: Turn on the host bridge probing on ARM64 Now we have everything we need, just provide a proper sysdata type for the bus to use on ARM64 and everything else works. Link: https://lore.kernel.org/r/20210726180657.142727-9-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi (cherry picked from commit 88f94c7f8f40d7e26f991f6f6ed914ff44361d75) --- drivers/pci/controller/pci-hyperv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index f9dc82258fdc..6992e3e89768 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -448,7 +449,11 @@ enum hv_pcibus_state { }; struct hv_pcibus_device { +#ifdef CONFIG_X86 struct pci_sysdata sysdata; +#elif defined(CONFIG_ARM64) + struct pci_config_window sysdata; +#endif struct pci_host_bridge *bridge; /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; @@ -3076,7 +3081,9 @@ static int hv_pci_probe(struct hv_device *hdev, dom_req, dom); hbus->bridge->domain_nr = dom; +#ifdef CONFIG_X86 hbus->sysdata.domain = dom; +#endif hbus->hdev = hdev; INIT_LIST_HEAD(&hbus->children); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Matheus Castello Date: Wed, 25 Nov 2020 00:29:26 -0300 Subject: [PATCH 41/53] drivers: hv: vmbus: Fix checkpatch SPLIT_STRING Checkpatch emits WARNING: quoted string split across lines. To keep the code clean and with the 80 column length indentation the check and registration code for kmsg_dump_register has been transferred to a new function hv_kmsg_dump_register. Signed-off-by: Matheus Castello Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201125032926.17002-1-matheus@castello.eng.br Signed-off-by: Wei Liu (cherry picked from commit b0c03eff79a67aa43f17249dd42fac58e96718dc) --- drivers/hv/vmbus_drv.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4f9a1d12aa88..7551b5c11a98 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1398,6 +1398,24 @@ static struct kmsg_dumper hv_kmsg_dumper = { .dump = hv_kmsg_dump, }; +static void hv_kmsg_dump_register(void) +{ + int ret; + + hv_panic_page = hv_alloc_hyperv_zeroed_page(); + if (!hv_panic_page) { + pr_err("Hyper-V: panic message page memory allocation failed\n"); + return; + } + + ret = kmsg_dump_register(&hv_kmsg_dumper); + if (ret) { + pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); + hv_free_hyperv_page((unsigned long)hv_panic_page); + hv_panic_page = NULL; + } +} + static struct ctl_table_header *hv_ctl_table_hdr; /* @@ -1488,21 +1506,8 @@ static int vmbus_bus_init(void) * capability is supported by the hypervisor. */ hv_get_crash_ctl(hyperv_crash_ctl); - if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { - hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); - if (hv_panic_page) { - ret = kmsg_dump_register(&hv_kmsg_dumper); - if (ret) { - pr_err("Hyper-V: kmsg dump register " - "error 0x%x\n", ret); - hv_free_hyperv_page( - (unsigned long)hv_panic_page); - hv_panic_page = NULL; - } - } else - pr_err("Hyper-V: panic message page memory " - "allocation failed"); - } + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) + hv_kmsg_dump_register(); register_die_notifier(&hyperv_die_block); } -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:15 -0800 Subject: [PATCH 42/53] Drivers: hv: Redo Hyper-V synthetic MSR get/set functions Current code defines a separate get and set macro for each Hyper-V synthetic MSR used by the VMbus driver. Furthermore, the get macro can't be converted to a standard function because the second argument is modified in place, which is somewhat bad form. Redo this by providing a single get and a single set function that take a parameter specifying the MSR to be operated on. Fixup usage of the get function. Calling locations are no more complex than before, but the code under arch/x86 and the upcoming code under arch/arm64 is significantly simplified. Also standardize the names of Hyper-V synthetic MSRs that are architecture neutral. But keep the old x86-specific names as aliases that can be removed later when all references (particularly in KVM code) have been cleaned up in a separate patch series. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/1614721102-2241-4-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit f3c5e63c3690fc64e5a7a2b3e4f9f5ff1fa25584) --- arch/x86/hyperv/hv_init.c | 2 +- arch/x86/include/asm/hyperv-tlfs.h | 102 ++++++++++++++++++----------- arch/x86/include/asm/mshyperv.h | 39 +++-------- drivers/clocksource/hyperv_timer.c | 26 ++++---- drivers/hv/hv.c | 37 ++++++----- drivers/hv/vmbus_drv.c | 2 +- include/asm-generic/mshyperv.h | 2 +- 7 files changed, 110 insertions(+), 100 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f879eb81b06d..48a5c45c09cb 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -75,7 +75,7 @@ static int hv_cpu_init(unsigned int cpu) *output_arg = page_address(pg + 1); } - hv_get_vp_index(msr_vp_index); + msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); hv_vp_index[smp_processor_id()] = msr_vp_index; diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 119cc587775a..89eb0a885319 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -149,7 +149,7 @@ enum hv_isolation_type { #define HV_X64_MSR_HYPERCALL 0x40000001 /* MSR used to provide vcpu index */ -#define HV_X64_MSR_VP_INDEX 0x40000002 +#define HV_REGISTER_VP_INDEX 0x40000002 /* MSR used to reset the guest OS. */ #define HV_X64_MSR_RESET 0x40000003 @@ -158,10 +158,10 @@ enum hv_isolation_type { #define HV_X64_MSR_VP_RUNTIME 0x40000010 /* MSR used to read the per-partition time reference counter */ -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_REGISTER_TIME_REF_COUNT 0x40000020 /* A partition's reference time stamp counter (TSC) page */ -#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_REGISTER_REFERENCE_TSC 0x40000021 /* MSR used to retrieve the TSC frequency */ #define HV_X64_MSR_TSC_FREQUENCY 0x40000022 @@ -176,50 +176,50 @@ enum hv_isolation_type { #define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 /* Define synthetic interrupt controller model specific registers. */ -#define HV_X64_MSR_SCONTROL 0x40000080 -#define HV_X64_MSR_SVERSION 0x40000081 -#define HV_X64_MSR_SIEFP 0x40000082 -#define HV_X64_MSR_SIMP 0x40000083 -#define HV_X64_MSR_EOM 0x40000084 -#define HV_X64_MSR_SINT0 0x40000090 -#define HV_X64_MSR_SINT1 0x40000091 -#define HV_X64_MSR_SINT2 0x40000092 -#define HV_X64_MSR_SINT3 0x40000093 -#define HV_X64_MSR_SINT4 0x40000094 -#define HV_X64_MSR_SINT5 0x40000095 -#define HV_X64_MSR_SINT6 0x40000096 -#define HV_X64_MSR_SINT7 0x40000097 -#define HV_X64_MSR_SINT8 0x40000098 -#define HV_X64_MSR_SINT9 0x40000099 -#define HV_X64_MSR_SINT10 0x4000009A -#define HV_X64_MSR_SINT11 0x4000009B -#define HV_X64_MSR_SINT12 0x4000009C -#define HV_X64_MSR_SINT13 0x4000009D -#define HV_X64_MSR_SINT14 0x4000009E -#define HV_X64_MSR_SINT15 0x4000009F +#define HV_REGISTER_SCONTROL 0x40000080 +#define HV_REGISTER_SVERSION 0x40000081 +#define HV_REGISTER_SIEFP 0x40000082 +#define HV_REGISTER_SIMP 0x40000083 +#define HV_REGISTER_EOM 0x40000084 +#define HV_REGISTER_SINT0 0x40000090 +#define HV_REGISTER_SINT1 0x40000091 +#define HV_REGISTER_SINT2 0x40000092 +#define HV_REGISTER_SINT3 0x40000093 +#define HV_REGISTER_SINT4 0x40000094 +#define HV_REGISTER_SINT5 0x40000095 +#define HV_REGISTER_SINT6 0x40000096 +#define HV_REGISTER_SINT7 0x40000097 +#define HV_REGISTER_SINT8 0x40000098 +#define HV_REGISTER_SINT9 0x40000099 +#define HV_REGISTER_SINT10 0x4000009A +#define HV_REGISTER_SINT11 0x4000009B +#define HV_REGISTER_SINT12 0x4000009C +#define HV_REGISTER_SINT13 0x4000009D +#define HV_REGISTER_SINT14 0x4000009E +#define HV_REGISTER_SINT15 0x4000009F /* * Synthetic Timer MSRs. Four timers per vcpu. */ -#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 -#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 -#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 -#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 -#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 -#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 -#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 -#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +#define HV_REGISTER_STIMER0_CONFIG 0x400000B0 +#define HV_REGISTER_STIMER0_COUNT 0x400000B1 +#define HV_REGISTER_STIMER1_CONFIG 0x400000B2 +#define HV_REGISTER_STIMER1_COUNT 0x400000B3 +#define HV_REGISTER_STIMER2_CONFIG 0x400000B4 +#define HV_REGISTER_STIMER2_COUNT 0x400000B5 +#define HV_REGISTER_STIMER3_CONFIG 0x400000B6 +#define HV_REGISTER_STIMER3_COUNT 0x400000B7 /* Hyper-V guest idle MSR */ #define HV_X64_MSR_GUEST_IDLE 0x400000F0 /* Hyper-V guest crash notification MSR's */ -#define HV_X64_MSR_CRASH_P0 0x40000100 -#define HV_X64_MSR_CRASH_P1 0x40000101 -#define HV_X64_MSR_CRASH_P2 0x40000102 -#define HV_X64_MSR_CRASH_P3 0x40000103 -#define HV_X64_MSR_CRASH_P4 0x40000104 -#define HV_X64_MSR_CRASH_CTL 0x40000105 +#define HV_REGISTER_CRASH_P0 0x40000100 +#define HV_REGISTER_CRASH_P1 0x40000101 +#define HV_REGISTER_CRASH_P2 0x40000102 +#define HV_REGISTER_CRASH_P3 0x40000103 +#define HV_REGISTER_CRASH_P4 0x40000104 +#define HV_REGISTER_CRASH_CTL 0x40000105 /* TSC emulation after migration */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 @@ -229,6 +229,32 @@ enum hv_isolation_type { /* TSC invariant control */ #define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 +/* Register name aliases for temporary compatibility */ +#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT +#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG +#define HV_X64_MSR_STIMER1_COUNT HV_REGISTER_STIMER1_COUNT +#define HV_X64_MSR_STIMER1_CONFIG HV_REGISTER_STIMER1_CONFIG +#define HV_X64_MSR_STIMER2_COUNT HV_REGISTER_STIMER2_COUNT +#define HV_X64_MSR_STIMER2_CONFIG HV_REGISTER_STIMER2_CONFIG +#define HV_X64_MSR_STIMER3_COUNT HV_REGISTER_STIMER3_COUNT +#define HV_X64_MSR_STIMER3_CONFIG HV_REGISTER_STIMER3_CONFIG +#define HV_X64_MSR_SCONTROL HV_REGISTER_SCONTROL +#define HV_X64_MSR_SVERSION HV_REGISTER_SVERSION +#define HV_X64_MSR_SIMP HV_REGISTER_SIMP +#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP +#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX +#define HV_X64_MSR_EOM HV_REGISTER_EOM +#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0 +#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15 +#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0 +#define HV_X64_MSR_CRASH_P1 HV_REGISTER_CRASH_P1 +#define HV_X64_MSR_CRASH_P2 HV_REGISTER_CRASH_P2 +#define HV_X64_MSR_CRASH_P3 HV_REGISTER_CRASH_P3 +#define HV_X64_MSR_CRASH_P4 HV_REGISTER_CRASH_P4 +#define HV_X64_MSR_CRASH_CTL HV_REGISTER_CRASH_CTL +#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT +#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC + /* * Declare the MSR used to setup pages used to communicate with the hypervisor. */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index a34f31dd5d93..ec7928d0120b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -15,41 +15,22 @@ typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, void *data); -#define hv_init_timer(timer, tick) \ - wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick) -#define hv_init_timer_config(timer, val) \ - wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val) - -#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) -#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) - -#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val) -#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val) - -#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val) -#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val) +static inline void hv_set_register(unsigned int reg, u64 value) +{ + wrmsrl(reg, value); +} -#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) +static inline u64 hv_get_register(unsigned int reg) +{ + u64 value; -#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0) + rdmsrl(reg, value); + return value; +} -#define hv_get_synint_state(int_num, val) \ - rdmsrl(HV_X64_MSR_SINT0 + int_num, val) -#define hv_set_synint_state(int_num, val) \ - wrmsrl(HV_X64_MSR_SINT0 + int_num, val) #define hv_recommend_using_aeoi() \ (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)) -#define hv_get_crash_ctl(val) \ - rdmsrl(HV_X64_MSR_CRASH_CTL, val) - -#define hv_get_time_ref_count(val) \ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) - -#define hv_get_reference_tsc(val) \ - rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) -#define hv_set_reference_tsc(val) \ - wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) #define hv_set_clocksource_vdso(val) \ ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) #define hv_enable_vdso_clocksource() \ diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index c97e1b1e6653..bbda003f8a1c 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -68,14 +68,14 @@ static int hv_ce_set_next_event(unsigned long delta, current_tick = hv_read_reference_counter(); current_tick += delta; - hv_init_timer(0, current_tick); + hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick); return 0; } static int hv_ce_shutdown(struct clock_event_device *evt) { - hv_init_timer(0, 0); - hv_init_timer_config(0, 0); + hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); + hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); if (direct_mode_enabled) hv_disable_stimer0_percpu_irq(stimer0_irq); @@ -105,7 +105,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) timer_cfg.direct_mode = 0; timer_cfg.sintx = stimer0_message_sint; } - hv_init_timer_config(0, timer_cfg.as_uint64); + hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64); return 0; } @@ -323,7 +323,7 @@ static u64 notrace read_hv_clock_tsc(void) u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); if (current_tick == U64_MAX) - hv_get_time_ref_count(current_tick); + current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); return current_tick; } @@ -344,9 +344,9 @@ static void suspend_hv_clock_tsc(struct clocksource *arg) u64 tsc_msr; /* Disable the TSC page */ - hv_get_reference_tsc(tsc_msr); + tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr &= ~BIT_ULL(0); - hv_set_reference_tsc(tsc_msr); + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } @@ -356,10 +356,10 @@ static void resume_hv_clock_tsc(struct clocksource *arg) u64 tsc_msr; /* Re-enable the TSC page */ - hv_get_reference_tsc(tsc_msr); + tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr &= GENMASK_ULL(11, 0); tsc_msr |= BIT_ULL(0) | (u64)phys_addr; - hv_set_reference_tsc(tsc_msr); + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } static int hv_cs_enable(struct clocksource *cs) @@ -381,14 +381,12 @@ static struct clocksource hyperv_cs_tsc = { static u64 notrace read_hv_clock_msr(void) { - u64 current_tick; /* * Read the partition counter to get the current tick count. This count * is set to 0 when the partition is created and is incremented in * 100 nanosecond units. */ - hv_get_time_ref_count(current_tick); - return current_tick; + return hv_get_register(HV_REGISTER_TIME_REF_COUNT); } static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) @@ -466,10 +464,10 @@ static bool __init hv_init_tsc_clocksource(void) * (which already has at least the low 12 bits set to zero since * it is page aligned). Also set the "enable" bit, which is bit 0. */ - hv_get_reference_tsc(tsc_msr); + tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr &= GENMASK_ULL(11, 0); tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; - hv_set_reference_tsc(tsc_msr); + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); hv_set_clocksource_vdso(hyperv_cs_tsc); clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 3b1ba6c607a1..2fb794d2435d 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -198,34 +198,36 @@ void hv_synic_enable_regs(unsigned int cpu) union hv_synic_scontrol sctrl; /* Setup the Synic's message page */ - hv_get_simp(simp.as_uint64); + simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); simp.simp_enabled = 1; simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) >> HV_HYP_PAGE_SHIFT; - hv_set_simp(simp.as_uint64); + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); /* Setup the Synic's event page */ - hv_get_siefp(siefp.as_uint64); + siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled = 1; siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) >> HV_HYP_PAGE_SHIFT; - hv_set_siefp(siefp.as_uint64); + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); /* Setup the shared SINT. */ - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + + VMBUS_MESSAGE_SINT); shared_sint.vector = hv_get_vector(); shared_sint.masked = false; shared_sint.auto_eoi = hv_recommend_using_aeoi(); - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); /* Enable the global synic bit */ - hv_get_synic_state(sctrl.as_uint64); + sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); sctrl.enable = 1; - hv_set_synic_state(sctrl.as_uint64); + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); } int hv_synic_init(unsigned int cpu) @@ -247,32 +249,35 @@ void hv_synic_disable_regs(unsigned int cpu) union hv_synic_siefp siefp; union hv_synic_scontrol sctrl; - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + + VMBUS_MESSAGE_SINT); shared_sint.masked = 1; /* Need to correctly cleanup in the case of SMP!!! */ /* Disable the interrupt */ - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); - hv_get_simp(simp.as_uint64); + simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); simp.simp_enabled = 0; simp.base_simp_gpa = 0; - hv_set_simp(simp.as_uint64); + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); - hv_get_siefp(siefp.as_uint64); + siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled = 0; siefp.base_siefp_gpa = 0; - hv_set_siefp(siefp.as_uint64); + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); /* Disable the global synic bit */ - hv_get_synic_state(sctrl.as_uint64); + sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); sctrl.enable = 0; - hv_set_synic_state(sctrl.as_uint64); + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); } + int hv_synic_cleanup(unsigned int cpu) { struct vmbus_channel *channel, *sc; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 7551b5c11a98..a5f55839dfaa 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1505,7 +1505,7 @@ static int vmbus_bus_init(void) * Register for panic kmsg callback only if the right * capability is supported by the hypervisor. */ - hv_get_crash_ctl(hyperv_crash_ctl); + hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) hv_kmsg_dump_register(); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 2c7500e4810f..41c65e434a0d 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -135,7 +135,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) * possibly deliver another msg from the * hypervisor */ - hv_signal_eom(); + hv_set_register(HV_REGISTER_EOM, 0); } } -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:17 -0800 Subject: [PATCH 43/53] Drivers: hv: vmbus: Handle auto EOI quirk inline On x86/x64, Hyper-V provides a flag to indicate auto EOI functionality, but it doesn't on ARM64. Handle this quirk inline instead of calling into code under arch/x86 (and coming, under arch/arm64). No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/1614721102-2241-6-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 946f4b8680b8ad177f6489e023a1d95e82d502e2) --- arch/x86/include/asm/mshyperv.h | 3 --- drivers/hv/hv.c | 12 +++++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ec7928d0120b..93b3a4fbec52 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -28,9 +28,6 @@ static inline u64 hv_get_register(unsigned int reg) return value; } -#define hv_recommend_using_aeoi() \ - (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)) - #define hv_set_clocksource_vdso(val) \ ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) #define hv_enable_vdso_clocksource() \ diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 2fb794d2435d..08ff1d19824e 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -219,7 +219,17 @@ void hv_synic_enable_regs(unsigned int cpu) shared_sint.vector = hv_get_vector(); shared_sint.masked = false; - shared_sint.auto_eoi = hv_recommend_using_aeoi(); + + /* + * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), + * it doesn't provide a recommendation flag and AEOI must be disabled. + */ +#ifdef HV_DEPRECATING_AEOI_RECOMMENDED + shared_sint.auto_eoi = + !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); +#else + shared_sint.auto_eoi = 0; +#endif hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:19 -0800 Subject: [PATCH 44/53] clocksource/drivers/hyper-v: Handle vDSO differences inline While the driver for the Hyper-V Reference TSC and STIMERs is architecture neutral, vDSO is implemented for x86/x64, but not for ARM64. Current code calls into utility functions under arch/x86 (and coming, under arch/arm64) to handle the difference. Change this approach to handle the difference inline based on whether VDSO_CLOCK_MODE_HVCLOCK is present. The new approach removes code under arch/* since the difference is tied more to the specifics of the Linux implementation than to the architecture. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/1614721102-2241-8-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit e4ab4658f1cff14c82202132f7af2cb5c2741469) --- arch/x86/include/asm/mshyperv.h | 4 ---- drivers/clocksource/hyperv_timer.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 93b3a4fbec52..1a58715a8399 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -28,10 +28,6 @@ static inline u64 hv_get_register(unsigned int reg) return value; } -#define hv_set_clocksource_vdso(val) \ - ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) -#define hv_enable_vdso_clocksource() \ - vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() #define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index bbda003f8a1c..7a9030ca68c4 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -362,11 +362,13 @@ static void resume_hv_clock_tsc(struct clocksource *arg) hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } +#ifdef VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { - hv_enable_vdso_clocksource(); + vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); return 0; } +#endif static struct clocksource hyperv_cs_tsc = { .name = "hyperv_clocksource_tsc_page", @@ -376,7 +378,12 @@ static struct clocksource hyperv_cs_tsc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .suspend= suspend_hv_clock_tsc, .resume = resume_hv_clock_tsc, +#ifdef VDSO_CLOCKMODE_HVCLOCK .enable = hv_cs_enable, + .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, +#else + .vdso_clock_mode = VDSO_CLOCKMODE_NONE, +#endif }; static u64 notrace read_hv_clock_msr(void) @@ -469,7 +476,6 @@ static bool __init hv_init_tsc_clocksource(void) tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); - hv_set_clocksource_vdso(hyperv_cs_tsc); clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); hv_sched_clock_offset = hv_read_reference_counter(); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 13 May 2021 09:32:46 +0200 Subject: [PATCH 45/53] clocksource/drivers/hyper-v: Re-enable VDSO_CLOCKMODE_HVCLOCK on X86 Mohammed reports (https://bugzilla.kernel.org/show_bug.cgi?id=213029) the commit e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO differences inline") broke vDSO on x86. The problem appears to be that VDSO_CLOCKMODE_HVCLOCK is an enum value in 'enum vdso_clock_mode' and '#ifdef VDSO_CLOCKMODE_HVCLOCK' branch evaluates to false (it is not a define). Use a dedicated HAVE_VDSO_CLOCKMODE_HVCLOCK define instead. Fixes: e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO differences inline") Reported-by: Mohammed Gamal Suggested-by: Thomas Gleixner Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210513073246.1715070-1-vkuznets@redhat.com (cherry picked from commit 3486d2c9be652a31033363bdd50391b0c8a8fe21) --- arch/x86/include/asm/vdso/clocksource.h | 2 ++ drivers/clocksource/hyperv_timer.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h index 119ac8612d89..136e5e57cfe1 100644 --- a/arch/x86/include/asm/vdso/clocksource.h +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -7,4 +7,6 @@ VDSO_CLOCKMODE_PVCLOCK, \ VDSO_CLOCKMODE_HVCLOCK +#define HAVE_VDSO_CLOCKMODE_HVCLOCK + #endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 7a9030ca68c4..1259402569bb 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -362,7 +362,7 @@ static void resume_hv_clock_tsc(struct clocksource *arg) hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } -#ifdef VDSO_CLOCKMODE_HVCLOCK +#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); @@ -378,7 +378,7 @@ static struct clocksource hyperv_cs_tsc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .suspend= suspend_hv_clock_tsc, .resume = resume_hv_clock_tsc, -#ifdef VDSO_CLOCKMODE_HVCLOCK +#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK .enable = hv_cs_enable, .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, #else -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:18 -0800 Subject: [PATCH 46/53] Drivers: hv: vmbus: Move handling of VMbus interrupts VMbus interrupts are most naturally modelled as per-cpu IRQs. But because x86/x64 doesn't have per-cpu IRQs, the core VMbus interrupt handling machinery is done in code under arch/x86 and Linux IRQs are not used. Adding support for ARM64 means adding equivalent code using per-cpu IRQs under arch/arm64. A better model is to treat per-cpu IRQs as the normal path (which it is for modern architectures), and the x86/x64 path as the exception. Do this by incorporating standard Linux per-cpu IRQ allocation into the main VMbus driver, and bypassing it in the x86/x64 exception case. For x86/x64, special case code is retained under arch/x86, but no VMbus interrupt handling code is needed under arch/arm64. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/1614721102-2241-7-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit d608715d4771cf2d63de07a5d7b026b6f52a70a5) --- arch/x86/include/asm/mshyperv.h | 1 - arch/x86/kernel/cpu/mshyperv.c | 13 +++---- drivers/hv/hv.c | 8 ++++- drivers/hv/vmbus_drv.c | 63 +++++++++++++++++++++++++++++---- include/asm-generic/mshyperv.h | 7 ++-- 5 files changed, 70 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1a58715a8399..3004d8b2821e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -29,7 +29,6 @@ static inline u64 hv_get_register(unsigned int reg) } #define hv_get_raw_timer() rdtsc_ordered() -#define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR void hyperv_vector_handler(struct pt_regs *regs); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 16a5a901cde3..76c904d2f2d6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -60,23 +60,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) set_irq_regs(old_regs); } -int hv_setup_vmbus_irq(int irq, void (*handler)(void)) +void hv_setup_vmbus_handler(void (*handler)(void)) { - /* - * The 'irq' argument is ignored on x86/x64 because a hard-coded - * interrupt vector is used for Hyper-V interrupts. - */ vmbus_handler = handler; - return 0; } +EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); -void hv_remove_vmbus_irq(void) +void hv_remove_vmbus_handler(void) { /* We have no way to deallocate the interrupt gate */ vmbus_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); -EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); /* * Routines to do per-architecture handling of stimer0 diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 08ff1d19824e..147abe31b540 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "hyperv_vmbus.h" @@ -214,10 +215,12 @@ void hv_synic_enable_regs(unsigned int cpu) hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); /* Setup the shared SINT. */ + if (vmbus_irq != -1) + enable_percpu_irq(vmbus_irq, 0); shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT); - shared_sint.vector = hv_get_vector(); + shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; /* @@ -285,6 +288,9 @@ void hv_synic_disable_regs(unsigned int cpu) sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); sctrl.enable = 0; hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + + if (vmbus_irq != -1) + disable_percpu_irq(vmbus_irq); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a5f55839dfaa..8efac158ea61 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -48,8 +48,10 @@ static int hyperv_cpuhp_online; static void *hv_panic_page; +static long __percpu *vmbus_evt; + /* Values parsed from ACPI DSDT */ -static int vmbus_irq; +int vmbus_irq; int vmbus_interrupt; /* @@ -1351,7 +1353,13 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } - add_interrupt_randomness(hv_get_vector(), 0); + add_interrupt_randomness(vmbus_interrupt, 0); +} + +static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) +{ + vmbus_isr(); + return IRQ_HANDLED; } /* @@ -1466,9 +1474,28 @@ static int vmbus_bus_init(void) if (ret) return ret; - ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr); - if (ret) - goto err_setup; + /* + * VMbus interrupts are best modeled as per-cpu interrupts. If + * on an architecture with support for per-cpu IRQs (e.g. ARM64), + * allocate a per-cpu IRQ using standard Linux kernel functionality. + * If not on such an architecture (e.g., x86/x64), then rely on + * code in the arch-specific portion of the code tree to connect + * the VMbus interrupt handler. + */ + + if (vmbus_irq == -1) { + hv_setup_vmbus_handler(vmbus_isr); + } else { + vmbus_evt = alloc_percpu(long); + ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, + "Hyper-V VMbus", vmbus_evt); + if (ret) { + pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", + vmbus_irq, ret); + free_percpu(vmbus_evt); + goto err_setup; + } + } ret = hv_synic_alloc(); if (ret) @@ -1529,7 +1556,12 @@ static int vmbus_bus_init(void) err_cpuhp: hv_synic_free(); err_alloc: - hv_remove_vmbus_irq(); + if (vmbus_irq == -1) { + hv_remove_vmbus_handler(); + } else { + free_percpu_irq(vmbus_irq, vmbus_evt); + free_percpu(vmbus_evt); + } err_setup: bus_unregister(&hv_bus); unregister_sysctl_table(hv_ctl_table_hdr); @@ -2644,6 +2676,18 @@ static int __init hv_acpi_init(void) ret = -ETIMEDOUT; goto cleanup; } + + /* + * If we're on an architecture with a hardcoded hypervisor + * vector (i.e. x86/x64), override the VMbus interrupt found + * in the ACPI tables. Ensure vmbus_irq is not set since the + * normal Linux IRQ mechanism is not used in this case. + */ +#ifdef HYPERVISOR_CALLBACK_VECTOR + vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR; + vmbus_irq = -1; +#endif + hv_debug_init(); ret = vmbus_bus_init(); @@ -2674,7 +2718,12 @@ static void __exit vmbus_exit(void) vmbus_connection.conn_state = DISCONNECTED; hv_stimer_global_cleanup(); vmbus_disconnect(); - hv_remove_vmbus_irq(); + if (vmbus_irq == -1) { + hv_remove_vmbus_handler(); + } else { + free_percpu_irq(vmbus_irq, vmbus_evt); + free_percpu(vmbus_evt); + } for_each_online_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 41c65e434a0d..f9cde867c892 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -139,10 +139,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } -int hv_setup_vmbus_irq(int irq, void (*handler)(void)); -void hv_remove_vmbus_irq(void); -void hv_enable_vmbus_irq(void); -void hv_disable_vmbus_irq(void); +void hv_setup_vmbus_handler(void (*handler)(void)); +void hv_remove_vmbus_handler(void); void hv_setup_kexec_handler(void (*handler)(void)); void hv_remove_kexec_handler(void); @@ -150,6 +148,7 @@ void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); extern int vmbus_interrupt; +extern int vmbus_irq; #if IS_ENABLED(CONFIG_HYPERV) /* -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 2 Mar 2021 13:38:22 -0800 Subject: [PATCH 47/53] clocksource/drivers/hyper-v: Move handling of STIMER0 interrupts STIMER0 interrupts are most naturally modeled as per-cpu IRQs. But because x86/x64 doesn't have per-cpu IRQs, the core STIMER0 interrupt handling machinery is done in code under arch/x86 and Linux IRQs are not used. Adding support for ARM64 means adding equivalent code using per-cpu IRQs under arch/arm64. A better model is to treat per-cpu IRQs as the normal path (which it is for modern architectures), and the x86/x64 path as the exception. Do this by incorporating standard Linux per-cpu IRQ allocation into the main SITMER0 driver code, and bypass it in the x86/x64 exception case. For x86/x64, special case code is retained under arch/x86, but no STIMER0 interrupt handling code is needed under arch/arm64. No functional change. Signed-off-by: Michael Kelley Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/1614721102-2241-11-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit ec866be6ec547c9e1cc4451f04250e08b5fe67c7) --- arch/x86/hyperv/hv_init.c | 2 +- arch/x86/include/asm/mshyperv.h | 4 - arch/x86/kernel/cpu/mshyperv.c | 10 +- drivers/clocksource/hyperv_timer.c | 168 ++++++++++++++++++++--------- include/asm-generic/mshyperv.h | 5 - include/clocksource/hyperv_timer.h | 3 +- 6 files changed, 120 insertions(+), 72 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 48a5c45c09cb..64b6ebbb1a06 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -327,7 +327,7 @@ static void __init hv_stimer_setup_percpu_clockev(void) * Ignore any errors in setting up stimer clockevents * as we can run with the LAPIC timer as a fallback. */ - (void)hv_stimer_alloc(); + (void)hv_stimer_alloc(false); /* * Still register the LAPIC timer, because the direct-mode STIMER is diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 3004d8b2821e..67ff0d637e55 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -32,10 +32,6 @@ static inline u64 hv_get_register(unsigned int reg) void hyperv_vector_handler(struct pt_regs *regs); -static inline void hv_enable_stimer0_percpu_irq(int irq) {} -static inline void hv_disable_stimer0_percpu_irq(int irq) {} - - #if IS_ENABLED(CONFIG_HYPERV) extern int hyperv_init_cpuhp; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 76c904d2f2d6..9ddc74e475ca 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -90,21 +90,17 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) set_irq_regs(old_regs); } -int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) +/* For x86/x64, override weak placeholders in hyperv_timer.c */ +void hv_setup_stimer0_handler(void (*handler)(void)) { - *vector = HYPERV_STIMER0_VECTOR; - *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; - return 0; } -EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq); -void hv_remove_stimer0_irq(int irq) +void hv_remove_stimer0_handler(void) { /* We have no way to deallocate the interrupt gate */ hv_stimer0_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq); void hv_setup_kexec_handler(void (*handler)(void)) { diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 1259402569bb..150a3f7174a9 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -43,14 +46,13 @@ static u64 hv_sched_clock_offset __ro_after_init; */ static bool direct_mode_enabled; -static int stimer0_irq; -static int stimer0_vector; +static int stimer0_irq = -1; static int stimer0_message_sint; +static DEFINE_PER_CPU(long, stimer0_evt); /* - * ISR for when stimer0 is operating in Direct Mode. Direct Mode - * does not use VMbus or any VMbus messages, so process here and not - * in the VMbus driver code. + * Common code for stimer0 interrupts coming via Direct Mode or + * as a VMbus message. */ void hv_stimer0_isr(void) { @@ -61,6 +63,16 @@ void hv_stimer0_isr(void) } EXPORT_SYMBOL_GPL(hv_stimer0_isr); +/* + * stimer0 interrupt handler for architectures that support + * per-cpu interrupts, which also implies Direct Mode. + */ +static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) +{ + hv_stimer0_isr(); + return IRQ_HANDLED; +} + static int hv_ce_set_next_event(unsigned long delta, struct clock_event_device *evt) { @@ -76,8 +88,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt) { hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); - if (direct_mode_enabled) - hv_disable_stimer0_percpu_irq(stimer0_irq); + if (direct_mode_enabled && stimer0_irq >= 0) + disable_percpu_irq(stimer0_irq); return 0; } @@ -95,8 +107,9 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) * on the specified hardware vector/IRQ. */ timer_cfg.direct_mode = 1; - timer_cfg.apic_vector = stimer0_vector; - hv_enable_stimer0_percpu_irq(stimer0_irq); + timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; + if (stimer0_irq >= 0) + enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); } else { /* * When it expires, the timer will generate a VMbus message, @@ -169,10 +182,58 @@ int hv_stimer_cleanup(unsigned int cpu) } EXPORT_SYMBOL_GPL(hv_stimer_cleanup); +/* + * These placeholders are overridden by arch specific code on + * architectures that need special setup of the stimer0 IRQ because + * they don't support per-cpu IRQs (such as x86/x64). + */ +void __weak hv_setup_stimer0_handler(void (*handler)(void)) +{ +}; + +void __weak hv_remove_stimer0_handler(void) +{ +}; + +/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ +static int hv_setup_stimer0_irq(void) +{ + int ret; + + ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, + ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); + if (ret < 0) { + pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); + return ret; + } + stimer0_irq = ret; + + ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, + "Hyper-V stimer0", &stimer0_evt); + if (ret) { + pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", + stimer0_irq, ret); + acpi_unregister_gsi(stimer0_irq); + stimer0_irq = -1; + } + return ret; +} + +static void hv_remove_stimer0_irq(void) +{ + if (stimer0_irq == -1) { + hv_remove_stimer0_handler(); + } else { + free_percpu_irq(stimer0_irq, &stimer0_evt); + acpi_unregister_gsi(stimer0_irq); + stimer0_irq = -1; + } +} + /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ -int hv_stimer_alloc(void) +int hv_stimer_alloc(bool have_percpu_irqs) { - int ret = 0; + int ret; /* * Synthetic timers are always available except on old versions of @@ -188,29 +249,37 @@ int hv_stimer_alloc(void) direct_mode_enabled = ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE; - if (direct_mode_enabled) { - ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, - hv_stimer0_isr); + + /* + * If Direct Mode isn't enabled, the remainder of the initialization + * is done later by hv_stimer_legacy_init() + */ + if (!direct_mode_enabled) + return 0; + + if (have_percpu_irqs) { + ret = hv_setup_stimer0_irq(); if (ret) - goto free_percpu; + goto free_clock_event; + } else { + hv_setup_stimer0_handler(hv_stimer0_isr); + } - /* - * Since we are in Direct Mode, stimer initialization - * can be done now with a CPUHP value in the same range - * as other clockevent devices. - */ - ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, - "clockevents/hyperv/stimer:starting", - hv_stimer_init, hv_stimer_cleanup); - if (ret < 0) - goto free_stimer0_irq; + /* + * Since we are in Direct Mode, stimer initialization + * can be done now with a CPUHP value in the same range + * as other clockevent devices. + */ + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, + "clockevents/hyperv/stimer:starting", + hv_stimer_init, hv_stimer_cleanup); + if (ret < 0) { + hv_remove_stimer0_irq(); + goto free_clock_event; } return ret; -free_stimer0_irq: - hv_remove_stimer0_irq(stimer0_irq); - stimer0_irq = 0; -free_percpu: +free_clock_event: free_percpu(hv_clock_event); hv_clock_event = NULL; return ret; @@ -254,23 +323,6 @@ void hv_stimer_legacy_cleanup(unsigned int cpu) } EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); - -/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ -void hv_stimer_free(void) -{ - if (!hv_clock_event) - return; - - if (direct_mode_enabled) { - cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); - hv_remove_stimer0_irq(stimer0_irq); - stimer0_irq = 0; - } - free_percpu(hv_clock_event); - hv_clock_event = NULL; -} -EXPORT_SYMBOL_GPL(hv_stimer_free); - /* * Do a global cleanup of clockevents for the cases of kexec and * vmbus exit @@ -287,12 +339,17 @@ void hv_stimer_global_cleanup(void) hv_stimer_legacy_cleanup(cpu); } - /* - * If Direct Mode is enabled, the cpuhp teardown callback - * (hv_stimer_cleanup) will be run on all CPUs to stop the - * stimers. - */ - hv_stimer_free(); + if (!hv_clock_event) + return; + + if (direct_mode_enabled) { + cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); + hv_remove_stimer0_irq(); + stimer0_irq = -1; + } + free_percpu(hv_clock_event); + hv_clock_event = NULL; + } EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); @@ -457,9 +514,14 @@ static bool __init hv_init_tsc_clocksource(void) * Hyper-V Reference TSC rating, causing the generic TSC to be used. * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference * TSC will be preferred over the virtualized ARM64 arch counter. + * While the Hyper-V MSR clocksource won't be used since the + * Reference TSC clocksource is present, change its rating as + * well for consistency. */ - if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) + if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { hyperv_cs_tsc.rating = 250; + hyperv_cs_msr.rating = 250; + } hv_read_reference_counter = read_hv_clock_tsc; phys_addr = virt_to_phys(hv_get_tsc_page()); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index f9cde867c892..e13f137a0ba8 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -231,9 +231,4 @@ static inline bool hv_is_hibernation_supported(void) { return false; } static inline void hyperv_cleanup(void) {} #endif /* CONFIG_HYPERV */ -#if IS_ENABLED(CONFIG_HYPERV) -extern int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); -extern void hv_remove_stimer0_irq(int irq); -#endif - #endif diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index 34eef083c988..b6774aa5a4b8 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -21,8 +21,7 @@ #define HV_MIN_DELTA_TICKS 1 /* Routines called by the VMbus driver */ -extern int hv_stimer_alloc(void); -extern void hv_stimer_free(void); +extern int hv_stimer_alloc(bool have_percpu_irqs); extern int hv_stimer_cleanup(unsigned int cpu); extern void hv_stimer_legacy_init(unsigned int cpu, int sint); extern void hv_stimer_legacy_cleanup(unsigned int cpu); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 14 Jul 2021 11:34:45 -0700 Subject: [PATCH 48/53] Drivers: hv: Make portions of Hyper-V init code be arch neutral The code to allocate and initialize the hv_vp_index array is architecture neutral. Similarly, the code to allocate and populate the hypercall input and output arg pages is architecture neutral. Move both sets of code out from arch/x86 and into utility functions in drivers/hv/hv_common.c that can be shared by Hyper-V initialization on ARM64. No functional changes. However, the allocation of the hypercall input and output arg pages is done differently so that the size is always the Hyper-V page size, even if not the same as the guest page size (such as with ARM64's 64K page size). Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1626287687-2045-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit afca4d95dd7d7936d46a0ff02169cc40f534a6a3) --- arch/x86/hyperv/hv_init.c | 91 +++------------------ arch/x86/include/asm/mshyperv.h | 4 - arch/x86/kernel/cpu/mshyperv.c | 3 - drivers/hv/hv_common.c | 138 ++++++++++++++++++++++++++++++++ include/asm-generic/mshyperv.h | 10 +++ 5 files changed, 158 insertions(+), 88 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 64b6ebbb1a06..8e4c05ba541f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -39,48 +39,17 @@ EXPORT_SYMBOL_GPL(hv_hypercall_pg); /* Storage to save the hypercall page temporarily for hibernation */ static void *hv_hypercall_pg_saved; -u32 *hv_vp_index; -EXPORT_SYMBOL_GPL(hv_vp_index); - struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); -void __percpu **hyperv_pcpu_input_arg; -EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); - -void __percpu **hyperv_pcpu_output_arg; -EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); - -u32 hv_max_vp_index; -EXPORT_SYMBOL_GPL(hv_max_vp_index); - static int hv_cpu_init(unsigned int cpu) { - u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; - void **input_arg; - struct page *pg; - - /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ - pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0); - if (unlikely(!pg)) - return -ENOMEM; - - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *input_arg = page_address(pg); - if (hv_root_partition) { - void **output_arg; - - output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *output_arg = page_address(pg + 1); - } - - msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); - - hv_vp_index[smp_processor_id()] = msr_vp_index; + int ret; - if (msr_vp_index > hv_max_vp_index) - hv_max_vp_index = msr_vp_index; + ret = hv_common_cpu_init(cpu); + if (ret) + return ret; if (!hv_vp_assist_page) return 0; @@ -198,25 +167,8 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; - unsigned long flags; - void **input_arg; - void *pg; - local_irq_save(flags); - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - pg = *input_arg; - *input_arg = NULL; - - if (hv_root_partition) { - void **output_arg; - - output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *output_arg = NULL; - } - - local_irq_restore(flags); - - free_pages((unsigned long)pg, hv_root_partition ? 1 : 0); + hv_common_cpu_die(cpu); if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); @@ -368,7 +320,7 @@ void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; - int cpuhp, i; + int cpuhp; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -380,36 +332,14 @@ void __init hyperv_init(void) if ((ms_hyperv.features & required_msrs) != required_msrs) return; - /* - * Allocate the per-CPU state for the hypercall input arg. - * If this allocation fails, we will not be able to setup - * (per-CPU) hypercall input page and thus this failure is - * fatal on Hyper-V. - */ - hyperv_pcpu_input_arg = alloc_percpu(void *); - - BUG_ON(hyperv_pcpu_input_arg == NULL); - - /* Allocate the per-CPU state for output arg for root */ - if (hv_root_partition) { - hyperv_pcpu_output_arg = alloc_percpu(void *); - BUG_ON(hyperv_pcpu_output_arg == NULL); - } - - /* Allocate percpu VP index */ - hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), - GFP_KERNEL); - if (!hv_vp_index) + if (hv_common_init()) return; - for (i = 0; i < num_possible_cpus(); i++) - hv_vp_index[i] = VP_INVAL; - hv_vp_assist_page = kcalloc(num_possible_cpus(), sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - goto free_vp_index; + goto common_free; } cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", @@ -507,9 +437,8 @@ void __init hyperv_init(void) free_vp_assist_page: kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; -free_vp_index: - kfree(hv_vp_index); - hv_vp_index = NULL; +common_free: + hv_common_free(); } /* diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 67ff0d637e55..adccbc209169 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -36,8 +36,6 @@ void hyperv_vector_handler(struct pt_regs *regs); extern int hyperv_init_cpuhp; extern void *hv_hypercall_pg; -extern void __percpu **hyperv_pcpu_input_arg; -extern void __percpu **hyperv_pcpu_output_arg; extern u64 hv_current_partition_id; @@ -170,8 +168,6 @@ int hyperv_fill_flush_guest_mapping_list( struct hv_guest_mapping_flush_list *flush, u64 start_gfn, u64 end_gfn); -extern bool hv_root_partition; - #ifdef CONFIG_X86_64 void hv_apic_init(void); void __init hv_init_spinlocks(void); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9ddc74e475ca..79223a0c863e 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -35,10 +35,7 @@ /* Is Linux running as the root partition? */ bool hv_root_partition; -EXPORT_SYMBOL_GPL(hv_root_partition); - struct ms_hyperv_info ms_hyperv; -EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index f0053c786891..caba4f728987 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -15,9 +15,147 @@ #include #include #include +#include +#include #include #include +/* + * hv_root_partition and ms_hyperv are defined here with other Hyper-V + * specific globals so they are shared across all architectures and are + * built only when CONFIG_HYPERV is defined. But on x86, + * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not + * defined, and it uses these two variables. So mark them as __weak + * here, allowing for an overriding definition in the module containing + * ms_hyperv_init_platform(). + */ +bool __weak hv_root_partition; +EXPORT_SYMBOL_GPL(hv_root_partition); + +struct ms_hyperv_info __weak ms_hyperv; +EXPORT_SYMBOL_GPL(ms_hyperv); + +u32 *hv_vp_index; +EXPORT_SYMBOL_GPL(hv_vp_index); + +u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); + +void __percpu **hyperv_pcpu_input_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + +void __percpu **hyperv_pcpu_output_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); + +/* + * Hyper-V specific initialization and shutdown code that is + * common across all architectures. Called from architecture + * specific initialization functions. + */ + +void __init hv_common_free(void) +{ + kfree(hv_vp_index); + hv_vp_index = NULL; + + free_percpu(hyperv_pcpu_output_arg); + hyperv_pcpu_output_arg = NULL; + + free_percpu(hyperv_pcpu_input_arg); + hyperv_pcpu_input_arg = NULL; +} + +int __init hv_common_init(void) +{ + int i; + + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup + * (per-CPU) hypercall input page and thus this failure is + * fatal on Hyper-V. + */ + hyperv_pcpu_input_arg = alloc_percpu(void *); + BUG_ON(!hyperv_pcpu_input_arg); + + /* Allocate the per-CPU state for output arg for root */ + if (hv_root_partition) { + hyperv_pcpu_output_arg = alloc_percpu(void *); + BUG_ON(!hyperv_pcpu_output_arg); + } + + hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), + GFP_KERNEL); + if (!hv_vp_index) { + hv_common_free(); + return -ENOMEM; + } + + for (i = 0; i < num_possible_cpus(); i++) + hv_vp_index[i] = VP_INVAL; + + return 0; +} + +/* + * Hyper-V specific initialization and die code for + * individual CPUs that is common across all architectures. + * Called by the CPU hotplug mechanism. + */ + +int hv_common_cpu_init(unsigned int cpu) +{ + void **inputarg, **outputarg; + u64 msr_vp_index; + gfp_t flags; + int pgcount = hv_root_partition ? 2 : 1; + + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ + flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; + + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); + if (!(*inputarg)) + return -ENOMEM; + + if (hv_root_partition) { + outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + } + + msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); + + hv_vp_index[cpu] = msr_vp_index; + + if (msr_vp_index > hv_max_vp_index) + hv_max_vp_index = msr_vp_index; + + return 0; +} + +int hv_common_cpu_die(unsigned int cpu) +{ + unsigned long flags; + void **inputarg, **outputarg; + void *mem; + + local_irq_save(flags); + + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + mem = *inputarg; + *inputarg = NULL; + + if (hv_root_partition) { + outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *outputarg = NULL; + } + + local_irq_restore(flags); + + kfree(mem); + + return 0; +} /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ bool hv_query_ext_cap(u64 cap_query) diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index e13f137a0ba8..977166ff8672 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -39,6 +39,9 @@ struct ms_hyperv_info { }; extern struct ms_hyperv_info ms_hyperv; +extern void __percpu **hyperv_pcpu_input_arg; +extern void __percpu **hyperv_pcpu_output_arg; + extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); @@ -150,6 +153,8 @@ void hv_remove_crash_handler(void); extern int vmbus_interrupt; extern int vmbus_irq; +extern bool hv_root_partition; + #if IS_ENABLED(CONFIG_HYPERV) /* * Hypervisor's notion of virtual processor ID is different from @@ -163,6 +168,11 @@ extern u32 hv_max_vp_index; /* Sentinel value for an uninitialized entry in hv_vp_index array */ #define VP_INVAL U32_MAX +int __init hv_common_init(void); +void __init hv_common_free(void); +int hv_common_cpu_init(unsigned int cpu); +int hv_common_cpu_die(unsigned int cpu); + void *hv_alloc_hyperv_page(void); void *hv_alloc_hyperv_zeroed_page(void); void hv_free_hyperv_page(unsigned long addr); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 14 Jul 2021 11:34:46 -0700 Subject: [PATCH 49/53] Drivers: hv: Add arch independent default functions for some Hyper-V handlers Architecture independent Hyper-V code calls various arch-specific handlers when needed. To aid in supporting multiple architectures, provide weak defaults that can be overridden by arch-specific implementations where appropriate. But when arch-specific overrides aren't needed or haven't been implemented yet for a particular architecture, these stubs reduce the amount of clutter under arch/. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1626287687-2045-3-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 9d7cf2c9675838c12cd5cf5a4ebe2ba41bd78a44) --- arch/x86/hyperv/hv_init.c | 2 -- arch/x86/kernel/cpu/mshyperv.c | 6 ----- drivers/hv/hv_common.c | 49 ++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 8e4c05ba541f..0ec2222e35fe 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -468,7 +468,6 @@ void hyperv_cleanup(void) hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); } -EXPORT_SYMBOL_GPL(hyperv_cleanup); void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { @@ -542,4 +541,3 @@ bool hv_is_isolation_supported(void) { return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; } -EXPORT_SYMBOL_GPL(hv_is_isolation_supported); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 79223a0c863e..617b7f35cc8f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -61,14 +61,12 @@ void hv_setup_vmbus_handler(void (*handler)(void)) { vmbus_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); void hv_remove_vmbus_handler(void) { /* We have no way to deallocate the interrupt gate */ vmbus_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); /* * Routines to do per-architecture handling of stimer0 @@ -103,25 +101,21 @@ void hv_setup_kexec_handler(void (*handler)(void)) { hv_kexec_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); void hv_remove_kexec_handler(void) { hv_kexec_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) { hv_crash_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_crash_handler); void hv_remove_crash_handler(void) { hv_crash_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_crash_handler); #ifdef CONFIG_KEXEC_CORE static void hv_machine_shutdown(void) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index caba4f728987..cac9c6bfc653 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -202,3 +203,51 @@ bool hv_query_ext_cap(u64 cap_query) return hv_extended_cap & cap_query; } EXPORT_SYMBOL_GPL(hv_query_ext_cap); + +/* These __weak functions provide default "no-op" behavior and + * may be overridden by architecture specific versions. Architectures + * for which the default "no-op" behavior is sufficient can leave + * them unimplemented and not be cluttered with a bunch of stub + * functions in arch-specific code. + */ + +bool __weak hv_is_isolation_supported(void) +{ + return false; +} +EXPORT_SYMBOL_GPL(hv_is_isolation_supported); + +void __weak hv_setup_vmbus_handler(void (*handler)(void)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); + +void __weak hv_remove_vmbus_handler(void) +{ +} +EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); + +void __weak hv_setup_kexec_handler(void (*handler)(void)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); + +void __weak hv_remove_kexec_handler(void) +{ +} +EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); + +void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_crash_handler); + +void __weak hv_remove_crash_handler(void) +{ +} +EXPORT_SYMBOL_GPL(hv_remove_crash_handler); + +void __weak hyperv_cleanup(void) +{ +} +EXPORT_SYMBOL_GPL(hyperv_cleanup); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:56 +0800 Subject: [PATCH 50/53] PCI: hv: Set up MSI domain at bridge probing time Since PCI_HYPERV depends on PCI_MSI_IRQ_DOMAIN which selects GENERIC_MSI_IRQ_DOMAIN, we can use dev_set_msi_domain() to set up the MSI domain at probing time, and this works for both x86 and ARM64. Therefore use it as the preparation for ARM64 Hyper-V PCI support. As a result, no longer need to maintain ->fwnode in x86 specific pci_sysdata, and make hv_pcibus_device own it instead. Link: https://lore.kernel.org/r/20210726180657.142727-8-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi (cherry picked from commit 9e7f9178ab4943b3a7294a12bc38925c515ca3f0) --- drivers/pci/controller/pci-hyperv.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 6992e3e89768..f6ee9f359a75 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -455,6 +455,7 @@ struct hv_pcibus_device { struct pci_config_window sysdata; #endif struct pci_host_bridge *bridge; + struct fwnode_handle *fwnode; /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; enum hv_pcibus_state state; @@ -1571,7 +1572,7 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus) hbus->msi_info.handler = handle_edge_irq; hbus->msi_info.handler_name = "edge"; hbus->msi_info.data = hbus; - hbus->irq_domain = pci_msi_create_irq_domain(hbus->sysdata.fwnode, + hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode, &hbus->msi_info, x86_vector_domain); if (!hbus->irq_domain) { @@ -1580,6 +1581,8 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus) return -ENODEV; } + dev_set_msi_domain(&hbus->bridge->dev, hbus->irq_domain); + return 0; } @@ -3129,9 +3132,9 @@ static int hv_pci_probe(struct hv_device *hdev, goto unmap; } - hbus->sysdata.fwnode = irq_domain_alloc_named_fwnode(name); + hbus->fwnode = irq_domain_alloc_named_fwnode(name); kfree(name); - if (!hbus->sysdata.fwnode) { + if (!hbus->fwnode) { ret = -ENOMEM; goto unmap; } @@ -3209,7 +3212,7 @@ static int hv_pci_probe(struct hv_device *hdev, free_irq_domain: irq_domain_remove(hbus->irq_domain); free_fwnode: - irq_domain_free_fwnode(hbus->sysdata.fwnode); + irq_domain_free_fwnode(hbus->fwnode); unmap: iounmap(hbus->cfg_addr); free_config: @@ -3332,7 +3335,7 @@ static int hv_pci_remove(struct hv_device *hdev) hv_free_config_window(hbus); hv_pci_free_bridge_windows(hbus); irq_domain_remove(hbus->irq_domain); - irq_domain_free_fwnode(hbus->sysdata.fwnode); + irq_domain_free_fwnode(hbus->fwnode); hv_put_dom_num(hbus->bridge->domain_nr); -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 14 Jul 2021 11:34:47 -0700 Subject: [PATCH 51/53] Drivers: hv: Move Hyper-V misc functionality to arch-neutral code The check for whether hibernation is possible, and the enabling of Hyper-V panic notification during kexec, are both architecture neutral. Move the code from under arch/x86 and into drivers/hv/hv_common.c where it can also be used for ARM64. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1626287687-2045-4-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 6dc77fa5ac2cf26f846a51492dbe42526e26d0f2) --- arch/x86/hyperv/hv_init.c | 8 +------- arch/x86/kernel/cpu/mshyperv.c | 10 ---------- drivers/hv/hv_common.c | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 0ec2222e35fe..5f2dae6c33d5 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -7,10 +7,10 @@ * Author : K. Y. Srinivasan */ -#include #include #include #include +#include #include #include #include @@ -523,12 +523,6 @@ bool hv_is_hyperv_initialized(void) } EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); -bool hv_is_hibernation_supported(void) -{ - return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); -} -EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); - enum hv_isolation_type hv_get_isolation_type(void) { if (!(ms_hyperv.priv_high & HV_ISOLATION)) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 617b7f35cc8f..c91c4c1aa2c8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -321,16 +321,6 @@ static void __init ms_hyperv_init_platform(void) cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); } - /* - * Hyper-V expects to get crash register data or kmsg when - * crash enlightment is available and system crashes. Set - * crash_kexec_post_notifiers to be true to make sure that - * calling crash enlightment interface before running kdump - * kernel. - */ - if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) - crash_kexec_post_notifiers = true; - #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index cac9c6bfc653..4d7fcff39b8c 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -13,9 +13,11 @@ */ #include +#include #include #include #include +#include #include #include #include @@ -70,6 +72,16 @@ int __init hv_common_init(void) { int i; + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + /* * Allocate the per-CPU state for the hypercall input arg. * If this allocation fails, we will not be able to setup @@ -204,6 +216,12 @@ bool hv_query_ext_cap(u64 cap_query) } EXPORT_SYMBOL_GPL(hv_query_ext_cap); +bool hv_is_hibernation_supported(void) +{ + return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); +} +EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); + /* These __weak functions provide default "no-op" behavior and * may be overridden by architecture specific versions. Architectures * for which the default "no-op" behavior is sufficient can leave -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 13 Jul 2021 17:01:46 -0700 Subject: [PATCH 52/53] drivers: hv: Decouple Hyper-V clock/timer code from VMbus drivers Hyper-V clock/timer code in hyperv_timer.c is mostly independent from other VMbus drivers, but building for ARM64 without hyperv_timer.c shows some remaining entanglements. A default implementation of hv_read_reference_counter can just read a Hyper-V synthetic register and be independent of hyperv_timer.c, so move this code out and into hv_common.c. Then it can be used by the timesync driver even if hyperv_timer.c isn't built on a particular architecture. If hyperv_timer.c *is* built, it can override with a faster implementation. Also provide stubs for stimer functions called by the VMbus driver when hyperv_timer.c isn't built. No functional changes. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1626220906-22629-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu (cherry picked from commit 31e5e64694cf9879e63b2802007fa934f4131126) --- drivers/clocksource/hyperv_timer.c | 3 --- drivers/hv/hv_common.c | 14 ++++++++++++++ drivers/hv/hv_util.c | 5 ----- include/asm-generic/mshyperv.h | 2 ++ include/clocksource/hyperv_timer.h | 11 +++++++++-- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 150a3f7174a9..6b59c0492cdd 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -361,9 +361,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); * Hyper-V and 32-bit x86. The TSC reference page version is preferred. */ -u64 (*hv_read_reference_counter)(void); -EXPORT_SYMBOL_GPL(hv_read_reference_counter); - static union { struct ms_hyperv_tsc_page page; u8 reserved[PAGE_SIZE]; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 4d7fcff39b8c..e589ca2d3f38 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -222,6 +222,20 @@ bool hv_is_hibernation_supported(void) } EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); +/* + * Default function to read the Hyper-V reference counter, independent + * of whether Hyper-V enlightened clocks/timers are being used. But on + * architectures where it is used, Hyper-V enlightenment code in + * hyperv_timer.c may override this function. + */ +static u64 __hv_read_ref_counter(void) +{ + return hv_get_register(HV_REGISTER_TIME_REF_COUNT); +} + +u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter; +EXPORT_SYMBOL_GPL(hv_read_reference_counter); + /* These __weak functions provide default "no-op" behavior and * may be overridden by architecture specific versions. Architectures * for which the default "no-op" behavior is sufficient can leave diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 1b914e418e41..83d6a1d80172 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include "hyperv_vmbus.h" @@ -681,10 +680,6 @@ static struct ptp_clock *hv_ptp_clock; static int hv_timesync_init(struct hv_util_service *srv) { - /* TimeSync requires Hyper-V clocksource. */ - if (!hv_read_reference_counter) - return -ENODEV; - spin_lock_init(&host_ts.lock); INIT_WORK(&adj_time_work, hv_set_host_time); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 977166ff8672..ea87171e5b25 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -165,6 +165,8 @@ extern bool hv_root_partition; extern u32 *hv_vp_index; extern u32 hv_max_vp_index; +extern u64 (*hv_read_reference_counter)(void); + /* Sentinel value for an uninitialized entry in hv_vp_index array */ #define VP_INVAL U32_MAX diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index b6774aa5a4b8..b3f5d73ae1d6 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -20,6 +20,8 @@ #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 +#ifdef CONFIG_HYPERV_TIMER + /* Routines called by the VMbus driver */ extern int hv_stimer_alloc(bool have_percpu_irqs); extern int hv_stimer_cleanup(unsigned int cpu); @@ -28,8 +30,6 @@ extern void hv_stimer_legacy_cleanup(unsigned int cpu); extern void hv_stimer_global_cleanup(void); extern void hv_stimer0_isr(void); -#ifdef CONFIG_HYPERV_TIMER -extern u64 (*hv_read_reference_counter)(void); extern void hv_init_clocksource(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); @@ -100,6 +100,13 @@ static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, { return U64_MAX; } + +static inline int hv_stimer_cleanup(unsigned int cpu) { return 0; } +static inline void hv_stimer_legacy_init(unsigned int cpu, int sint) {} +static inline void hv_stimer_legacy_cleanup(unsigned int cpu) {} +static inline void hv_stimer_global_cleanup(void) {} +static inline void hv_stimer0_isr(void) {} + #endif /* CONFIG_HYPERV_TIMER */ #endif -- 2.18.4 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Pablo Greco Date: Sat, 23 Oct 2021 15:42:26 +0000 Subject: [PATCH 53/53] Avoid backporting f39650de687e35766572ac89dbcd16a5911e2f0a --- drivers/hv/hv_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index e589ca2d3f38..3dc8e120b48d 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include -- 2.18.4