From 2179f3814fab3c6d3748a0342cc6e771d31ceb52 Mon Sep 17 00:00:00 2001 From: Pablo Greco Date: Nov 02 2021 12:07:26 +0000 Subject: Add backported patches for azure/aarch64 --- diff --git a/SOURCES/azure.patch b/SOURCES/azure.patch new file mode 100644 index 0000000..9dd1248 --- /dev/null +++ b/SOURCES/azure.patch @@ -0,0 +1,7859 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: "Andrea Parri (Microsoft)" +Date: Mon, 1 Feb 2021 15:48:11 +0100 +Subject: [PATCH 01/53] x86/hyperv: Load/save the Isolation Configuration leaf + +If bit 22 of Group B Features is set, the guest has access to the +Isolation Configuration CPUID leaf. On x86, the first four bits +of EAX in this leaf provide the isolation type of the partition; +we entail three isolation types: 'SNP' (hardware-based isolation), +'VBS' (software-based isolation), and 'NONE' (no isolation). + +Signed-off-by: Andrea Parri (Microsoft) +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Arnd Bergmann +Cc: x86@kernel.org +Cc: linux-arch@vger.kernel.org +Link: https://lore.kernel.org/r/20210201144814.2701-2-parri.andrea@gmail.com +Reviewed-by: Michael Kelley +Signed-off-by: Wei Liu +(cherry picked from commit a6c76bb08dc7f7ff2b1c381002eb6c7211746182) +--- + arch/x86/hyperv/hv_init.c | 15 +++++++++++++++ + arch/x86/include/asm/hyperv-tlfs.h | 15 +++++++++++++++ + arch/x86/kernel/cpu/mshyperv.c | 9 +++++++++ + include/asm-generic/hyperv-tlfs.h | 1 + + include/asm-generic/mshyperv.h | 5 +++++ + 5 files changed, 45 insertions(+) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 6375967a8244..6608d50d7aaa 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -555,3 +556,17 @@ bool hv_is_hibernation_supported(void) + return acpi_sleep_state_supported(ACPI_STATE_S4); + } + EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); ++ ++enum hv_isolation_type hv_get_isolation_type(void) ++{ ++ if (!(ms_hyperv.features_b & HV_ISOLATION)) ++ return HV_ISOLATION_TYPE_NONE; ++ return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); ++} ++EXPORT_SYMBOL_GPL(hv_get_isolation_type); ++ ++bool hv_is_isolation_supported(void) ++{ ++ return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; ++} ++EXPORT_SYMBOL_GPL(hv_is_isolation_supported); +diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h +index 0ed20e8bba9e..cc878049c39b 100644 +--- a/arch/x86/include/asm/hyperv-tlfs.h ++++ b/arch/x86/include/asm/hyperv-tlfs.h +@@ -22,6 +22,7 @@ + #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 + #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 + #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A ++#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C + + #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 + #define HYPERV_CPUID_MIN 0x40000005 +@@ -115,6 +116,20 @@ + #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) + #define HV_X64_NESTED_MSR_BITMAP BIT(19) + ++/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ ++#define HV_PARAVISOR_PRESENT BIT(0) ++ ++/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */ ++#define HV_ISOLATION_TYPE GENMASK(3, 0) ++#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) ++#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) ++ ++enum hv_isolation_type { ++ HV_ISOLATION_TYPE_NONE = 0, ++ HV_ISOLATION_TYPE_VBS = 1, ++ HV_ISOLATION_TYPE_SNP = 2 ++}; ++ + /* Hyper-V specific model specific registers (MSRs) */ + + /* MSR used to identify the guest OS. */ +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 65d11711cd7b..47a95d543658 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -243,6 +243,7 @@ static void __init ms_hyperv_init_platform(void) + * Extract the features and hints + */ + ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); ++ ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES); + ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); + ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); + +@@ -277,6 +278,14 @@ static void __init ms_hyperv_init_platform(void) + x86_platform.calibrate_cpu = hv_get_tsc_khz; + } + ++ if (ms_hyperv.features_b & HV_ISOLATION) { ++ ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); ++ ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); ++ ++ pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ++ ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); ++ } ++ + if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { + ms_hyperv.nested_features = + cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index e73a11850055..20d3cd950204 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -89,6 +89,7 @@ + #define HV_ACCESS_STATS BIT(8) + #define HV_DEBUGGING BIT(11) + #define HV_CPU_POWER_MANAGEMENT BIT(12) ++#define HV_ISOLATION BIT(22) + + + /* +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index c57799684170..dff58a3db5d5 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -27,11 +27,14 @@ + + struct ms_hyperv_info { + u32 features; ++ u32 features_b; + u32 misc_features; + u32 hints; + u32 nested_features; + u32 max_vp_index; + u32 max_lp_index; ++ u32 isolation_config_a; ++ u32 isolation_config_b; + }; + extern struct ms_hyperv_info ms_hyperv; + +@@ -169,6 +172,8 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); + void hyperv_report_panic_msg(phys_addr_t pa, size_t size); + bool hv_is_hyperv_initialized(void); + bool hv_is_hibernation_supported(void); ++enum hv_isolation_type hv_get_isolation_type(void); ++bool hv_is_isolation_supported(void); + void hyperv_cleanup(void); + #else /* CONFIG_HYPERV */ + static inline bool hv_is_hyperv_initialized(void) { return false; } +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:26 +0000 +Subject: [PATCH 02/53] x86/hyperv: handling hypercall page setup for root + +When Linux is running as the root partition, the hypercall page will +have already been setup by Hyper-V. Copy the content over to the +allocated page. + +Add checks to hv_suspend & co to bail early because they are not +supported in this setup yet. + +Signed-off-by: Lillian Grassin-Drake +Signed-off-by: Sunil Muthuswamy +Signed-off-by: Nuno Das Neves +Co-Developed-by: Lillian Grassin-Drake +Co-Developed-by: Sunil Muthuswamy +Co-Developed-by: Nuno Das Neves +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-8-wei.liu@kernel.org +(cherry picked from commit 80f73c9f7468b15484e3ee4a29870fc9fa0419cc) +--- + arch/x86/hyperv/hv_init.c | 37 ++++++++++++++++++++++++++++++++++--- + 1 file changed, 34 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 6608d50d7aaa..e85d3199a3da 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + int hyperv_init_cpuhp; + +@@ -265,6 +266,9 @@ static int hv_suspend(void) + union hv_x64_msr_hypercall_contents hypercall_msr; + int ret; + ++ if (hv_root_partition) ++ return -EPERM; ++ + /* + * Reset the hypercall page as it is going to be invalidated + * accross hibernation. Setting hv_hypercall_pg to NULL ensures +@@ -409,8 +413,35 @@ void __init hyperv_init(void) + + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 1; +- hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); +- wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); ++ ++ if (hv_root_partition) { ++ struct page *pg; ++ void *src, *dst; ++ ++ /* ++ * For the root partition, the hypervisor will set up its ++ * hypercall page. The hypervisor guarantees it will not show ++ * up in the root's address space. The root can't change the ++ * location of the hypercall page. ++ * ++ * Order is important here. We must enable the hypercall page ++ * so it is populated with code, then copy the code to an ++ * executable page. ++ */ ++ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); ++ ++ pg = vmalloc_to_page(hv_hypercall_pg); ++ dst = kmap(pg); ++ src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, ++ MEMREMAP_WB); ++ BUG_ON(!(src && dst)); ++ memcpy(dst, src, HV_HYP_PAGE_SIZE); ++ memunmap(src); ++ kunmap(pg); ++ } else { ++ hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); ++ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); ++ } + + /* + * hyperv_init() is called before LAPIC is initialized: see +@@ -553,7 +584,7 @@ EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); + + bool hv_is_hibernation_supported(void) + { +- return acpi_sleep_state_supported(ACPI_STATE_S4); ++ return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); + } + EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:23 +0000 +Subject: [PATCH 03/53] clocksource/hyperv: use MSR-based access if running as + root + +When Linux runs as the root partition, the setup required for TSC page +is different. Luckily Linux also has access to the MSR based +clocksource. We can just disable the TSC page clocksource if Linux is +the root partition. + +Signed-off-by: Wei Liu +Acked-by: Daniel Lezcano +Reviewed-by: Pavel Tatashin +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-5-wei.liu@kernel.org +(cherry picked from commit 7d4163c8315729140ad99d6e1ab10dfc7a685640) +--- + drivers/clocksource/hyperv_timer.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index ba04cb381cd3..269a691bd2c4 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -426,6 +426,9 @@ static bool __init hv_init_tsc_clocksource(void) + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return false; + ++ if (hv_root_partition) ++ return false; ++ + hv_read_reference_counter = read_hv_clock_tsc; + phys_addr = virt_to_phys(hv_get_tsc_page()); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:24 +0000 +Subject: [PATCH 04/53] x86/hyperv: allocate output arg pages if required + +When Linux runs as the root partition, it will need to make hypercalls +which return data from the hypervisor. + +Allocate pages for storing results when Linux runs as the root +partition. + +Signed-off-by: Lillian Grassin-Drake +Co-Developed-by: Lillian Grassin-Drake +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-6-wei.liu@kernel.org +(cherry picked from commit 5d0f077e0f413b7eca827b16ea8bfc4569e3946c) +--- + arch/x86/hyperv/hv_init.c | 35 ++++++++++++++++++++++++++++----- + arch/x86/include/asm/mshyperv.h | 1 + + 2 files changed, 31 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index e85d3199a3da..6c576c256e15 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -46,6 +46,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page); + void __percpu **hyperv_pcpu_input_arg; + EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + ++void __percpu **hyperv_pcpu_output_arg; ++EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); ++ + u32 hv_max_vp_index; + EXPORT_SYMBOL_GPL(hv_max_vp_index); + +@@ -78,12 +81,19 @@ static int hv_cpu_init(unsigned int cpu) + void **input_arg; + struct page *pg; + +- input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ +- pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); ++ pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0); + if (unlikely(!pg)) + return -ENOMEM; ++ ++ input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *input_arg = page_address(pg); ++ if (hv_root_partition) { ++ void **output_arg; ++ ++ output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); ++ *output_arg = page_address(pg + 1); ++ } + + hv_get_vp_index(msr_vp_index); + +@@ -210,14 +220,23 @@ static int hv_cpu_die(unsigned int cpu) + unsigned int new_cpu; + unsigned long flags; + void **input_arg; +- void *input_pg = NULL; ++ void *pg; + + local_irq_save(flags); + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); +- input_pg = *input_arg; ++ pg = *input_arg; + *input_arg = NULL; ++ ++ if (hv_root_partition) { ++ void **output_arg; ++ ++ output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); ++ *output_arg = NULL; ++ } ++ + local_irq_restore(flags); +- free_page((unsigned long)input_pg); ++ ++ free_pages((unsigned long)pg, hv_root_partition ? 1 : 0); + + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); +@@ -373,6 +392,12 @@ void __init hyperv_init(void) + + BUG_ON(hyperv_pcpu_input_arg == NULL); + ++ /* Allocate the per-CPU state for output arg for root */ ++ if (hv_root_partition) { ++ hyperv_pcpu_output_arg = alloc_percpu(void *); ++ BUG_ON(hyperv_pcpu_output_arg == NULL); ++ } ++ + /* Allocate percpu VP index */ + hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), + GFP_KERNEL); +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 30f76b966857..cf881d0c7c9d 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -78,6 +78,7 @@ extern int hyperv_init_cpuhp; + + extern void *hv_hypercall_pg; + extern void __percpu **hyperv_pcpu_input_arg; ++extern void __percpu **hyperv_pcpu_output_arg; + + static inline u64 hv_do_hypercall(u64 control, void *input, void *output) + { +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:21 +0000 +Subject: [PATCH 05/53] x86/hyperv: detect if Linux is the root partition + +For now we can use the privilege flag to check. Stash the value to be +used later. + +Put in a bunch of defines for future use when we want to have more +fine-grained detection. + +Signed-off-by: Wei Liu +Reviewed-by: Pavel Tatashin +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-3-wei.liu@kernel.org +(cherry picked from commit e997720202b363ba8000d769f114e3c2c5822227) +--- + arch/x86/include/asm/hyperv-tlfs.h | 10 ++++++++++ + arch/x86/include/asm/mshyperv.h | 2 ++ + arch/x86/kernel/cpu/mshyperv.c | 20 ++++++++++++++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h +index cc878049c39b..9cf3118a80d4 100644 +--- a/arch/x86/include/asm/hyperv-tlfs.h ++++ b/arch/x86/include/asm/hyperv-tlfs.h +@@ -21,6 +21,7 @@ + #define HYPERV_CPUID_FEATURES 0x40000003 + #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 + #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 ++#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 + #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A + #define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C + +@@ -104,6 +105,15 @@ + /* Recommend using enlightened VMCS */ + #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) + ++/* ++ * CPU management features identification. ++ * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. ++ */ ++#define HV_X64_START_LOGICAL_PROCESSOR BIT(0) ++#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1) ++#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2) ++#define HV_X64_RESERVED_IDENTITY_BIT BIT(31) ++ + /* + * Virtual processor will never share a physical core with another virtual + * processor, except for virtual processors that are reported as sibling SMT +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index cf881d0c7c9d..ef06cdac8444 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -240,6 +240,8 @@ int hyperv_fill_flush_guest_mapping_list( + struct hv_guest_mapping_flush_list *flush, + u64 start_gfn, u64 end_gfn); + ++extern bool hv_root_partition; ++ + #ifdef CONFIG_X86_64 + void hv_apic_init(void); + void __init hv_init_spinlocks(void); +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 47a95d543658..e8fc8d297b6b 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -32,6 +32,10 @@ + #include + #include + ++/* Is Linux running as the root partition? */ ++bool hv_root_partition; ++EXPORT_SYMBOL_GPL(hv_root_partition); ++ + struct ms_hyperv_info ms_hyperv; + EXPORT_SYMBOL_GPL(ms_hyperv); + +@@ -256,6 +260,22 @@ static void __init ms_hyperv_init_platform(void) + pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", + ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); + ++ /* ++ * Check CPU management privilege. ++ * ++ * To mirror what Windows does we should extract CPU management ++ * features and use the ReservedIdentityBit to detect if Linux is the ++ * root partition. But that requires negotiating CPU management ++ * interface (a process to be finalized). ++ * ++ * For now, use the privilege flag as the indicator for running as ++ * root. ++ */ ++ if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { ++ hv_root_partition = true; ++ pr_info("Hyper-V: running as root partition\n"); ++ } ++ + /* + * Extract host information. + */ +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:29 +0000 +Subject: [PATCH 06/53] x86/hyperv: implement and use hv_smp_prepare_cpus + +Microsoft Hypervisor requires the root partition to make a few +hypercalls to setup application processors before they can be used. + +Signed-off-by: Lillian Grassin-Drake +Signed-off-by: Sunil Muthuswamy +Co-Developed-by: Lillian Grassin-Drake +Co-Developed-by: Sunil Muthuswamy +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-11-wei.liu@kernel.org +(cherry picked from commit 333abaf5abb396820c4c7c26a8eecc7523c99184) +--- + arch/x86/kernel/cpu/mshyperv.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index e8fc8d297b6b..9fde437f53ea 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + + /* Is Linux running as the root partition? */ + bool hv_root_partition; +@@ -230,6 +231,32 @@ static void __init hv_smp_prepare_boot_cpu(void) + hv_init_spinlocks(); + #endif + } ++ ++static void __init hv_smp_prepare_cpus(unsigned int max_cpus) ++{ ++#ifdef CONFIG_X86_64 ++ int i; ++ int ret; ++#endif ++ ++ native_smp_prepare_cpus(max_cpus); ++ ++#ifdef CONFIG_X86_64 ++ for_each_present_cpu(i) { ++ if (i == 0) ++ continue; ++ ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); ++ BUG_ON(ret); ++ } ++ ++ for_each_present_cpu(i) { ++ if (i == 0) ++ continue; ++ ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i); ++ BUG_ON(ret); ++ } ++#endif ++} + #endif + + static void __init ms_hyperv_init_platform(void) +@@ -393,6 +420,8 @@ static void __init ms_hyperv_init_platform(void) + + # ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; ++ if (hv_root_partition) ++ smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; + # endif + + /* +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:20 +0000 +Subject: [PATCH 07/53] asm-generic/hyperv: change HV_CPU_POWER_MANAGEMENT to + HV_CPU_MANAGEMENT + +This makes the name match Hyper-V TLFS. + +Signed-off-by: Wei Liu +Reviewed-by: Vitaly Kuznetsov +Reviewed-by: Pavel Tatashin +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-2-wei.liu@kernel.org +(cherry picked from commit 8f1d14cb835672cd27f6533f22f4c73e60a30727) +--- + include/asm-generic/hyperv-tlfs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index 20d3cd950204..e232ddcb0a2d 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -88,7 +88,7 @@ + #define HV_CONNECT_PORT BIT(7) + #define HV_ACCESS_STATS BIT(8) + #define HV_DEBUGGING BIT(11) +-#define HV_CPU_POWER_MANAGEMENT BIT(12) ++#define HV_CPU_MANAGEMENT BIT(12) + #define HV_ISOLATION BIT(22) + + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:32 +0000 +Subject: [PATCH 08/53] asm-generic/hyperv: introduce hv_device_id and + auxiliary structures + +We will need to identify the device we want Microsoft Hypervisor to +manipulate. Introduce the data structures for that purpose. + +They will be used in a later patch. + +Signed-off-by: Sunil Muthuswamy +Co-Developed-by: Sunil Muthuswamy +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-14-wei.liu@kernel.org +(cherry picked from commit 12434e5fb6aed4655340ce74cd2a0dd859dff5bd) +--- + include/asm-generic/hyperv-tlfs.h | 79 +++++++++++++++++++++++++++++++ + 1 file changed, 79 insertions(+) + +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index e232ddcb0a2d..ccc81c277d09 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -495,4 +495,83 @@ struct hv_set_vp_registers_input { + } element[]; + } __packed; + ++enum hv_device_type { ++ HV_DEVICE_TYPE_LOGICAL = 0, ++ HV_DEVICE_TYPE_PCI = 1, ++ HV_DEVICE_TYPE_IOAPIC = 2, ++ HV_DEVICE_TYPE_ACPI = 3, ++}; ++ ++typedef u16 hv_pci_rid; ++typedef u16 hv_pci_segment; ++typedef u64 hv_logical_device_id; ++union hv_pci_bdf { ++ u16 as_uint16; ++ ++ struct { ++ u8 function:3; ++ u8 device:5; ++ u8 bus; ++ }; ++} __packed; ++ ++union hv_pci_bus_range { ++ u16 as_uint16; ++ ++ struct { ++ u8 subordinate_bus; ++ u8 secondary_bus; ++ }; ++} __packed; ++ ++union hv_device_id { ++ u64 as_uint64; ++ ++ struct { ++ u64 reserved0:62; ++ u64 device_type:2; ++ }; ++ ++ /* HV_DEVICE_TYPE_LOGICAL */ ++ struct { ++ u64 id:62; ++ u64 device_type:2; ++ } logical; ++ ++ /* HV_DEVICE_TYPE_PCI */ ++ struct { ++ union { ++ hv_pci_rid rid; ++ union hv_pci_bdf bdf; ++ }; ++ ++ hv_pci_segment segment; ++ union hv_pci_bus_range shadow_bus_range; ++ ++ u16 phantom_function_bits:2; ++ u16 source_shadow:1; ++ ++ u16 rsvdz0:11; ++ u16 device_type:2; ++ } pci; ++ ++ /* HV_DEVICE_TYPE_IOAPIC */ ++ struct { ++ u8 ioapic_id; ++ u8 rsvdz0; ++ u16 rsvdz1; ++ u16 rsvdz2; ++ ++ u16 rsvdz3:14; ++ u16 device_type:2; ++ } ioapic; ++ ++ /* HV_DEVICE_TYPE_ACPI */ ++ struct { ++ u32 input_mapping_base; ++ u32 input_mapping_count:30; ++ u32 device_type:2; ++ } acpi; ++} __packed; ++ + #endif +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:25 +0000 +Subject: [PATCH 09/53] x86/hyperv: extract partition ID from Microsoft + Hypervisor if necessary + +We will need the partition ID for executing some hypercalls later. + +Signed-off-by: Lillian Grassin-Drake +Co-Developed-by: Sunil Muthuswamy +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-7-wei.liu@kernel.org +(cherry picked from commit 99a0f46af6a7715147e81c558d558021aad4e207) +--- + arch/x86/hyperv/hv_init.c | 26 ++++++++++++++++++++++++++ + arch/x86/include/asm/mshyperv.h | 2 ++ + include/asm-generic/hyperv-tlfs.h | 6 ++++++ + 3 files changed, 34 insertions(+) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 6c576c256e15..aeea8fbf3c23 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -30,6 +30,8 @@ + #include + + int hyperv_init_cpuhp; ++u64 hv_current_partition_id = ~0ull; ++EXPORT_SYMBOL_GPL(hv_current_partition_id); + + void *hv_hypercall_pg; + EXPORT_SYMBOL_GPL(hv_hypercall_pg); +@@ -358,6 +360,24 @@ static void __init hv_stimer_setup_percpu_clockev(void) + old_setup_percpu_clockev(); + } + ++static void __init hv_get_partition_id(void) ++{ ++ struct hv_get_partition_id *output_page; ++ u64 status; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); ++ status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); ++ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { ++ /* No point in proceeding if this failed */ ++ pr_err("Failed to get partition ID: %lld\n", status); ++ BUG(); ++ } ++ hv_current_partition_id = output_page->partition_id; ++ local_irq_restore(flags); ++} ++ + /* + * This function is to be invoked early in the boot sequence after the + * hypervisor has been detected. +@@ -485,6 +505,12 @@ void __init hyperv_init(void) + register_syscore_ops(&hv_syscore_ops); + + hyperv_init_cpuhp = cpuhp; ++ ++ if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) ++ hv_get_partition_id(); ++ ++ BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); ++ + return; + + remove_cpuhp_state: +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index ef06cdac8444..b8324202d850 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -80,6 +80,8 @@ extern void *hv_hypercall_pg; + extern void __percpu **hyperv_pcpu_input_arg; + extern void __percpu **hyperv_pcpu_output_arg; + ++extern u64 hv_current_partition_id; ++ + static inline u64 hv_do_hypercall(u64 control, void *input, void *output) + { + u64 input_address = input ? virt_to_phys(input) : 0; +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index ccc81c277d09..cbc13b7c7022 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -142,6 +142,7 @@ struct ms_hyperv_tsc_page { + #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 + #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 + #define HVCALL_SEND_IPI_EX 0x0015 ++#define HVCALL_GET_PARTITION_ID 0x0046 + #define HVCALL_GET_VP_REGISTERS 0x0050 + #define HVCALL_SET_VP_REGISTERS 0x0051 + #define HVCALL_POST_MESSAGE 0x005c +@@ -408,6 +409,11 @@ struct hv_tlb_flush_ex { + u64 gva_list[]; + } __packed; + ++/* HvGetPartitionId hypercall (output only) */ ++struct hv_get_partition_id { ++ u64 partition_id; ++} __packed; ++ + /* HvRetargetDeviceInterrupt hypercall */ + union hv_msi_entry { + u64 as_uint64; +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:28 +0000 +Subject: [PATCH 10/53] x86/hyperv: provide a bunch of helper functions + +They are used to deposit pages into Microsoft Hypervisor and bring up +logical and virtual processors. + +Signed-off-by: Lillian Grassin-Drake +Signed-off-by: Sunil Muthuswamy +Signed-off-by: Nuno Das Neves +Co-Developed-by: Lillian Grassin-Drake +Co-Developed-by: Sunil Muthuswamy +Co-Developed-by: Nuno Das Neves +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-10-wei.liu@kernel.org +(cherry picked from commit 86b5ec3552f3c09694e6f7934834b0a2a3aeebbe) +--- + arch/x86/hyperv/Makefile | 2 +- + arch/x86/hyperv/hv_proc.c | 219 ++++++++++++++++++++++++++++++ + arch/x86/include/asm/mshyperv.h | 4 + + include/asm-generic/hyperv-tlfs.h | 67 +++++++++ + 4 files changed, 291 insertions(+), 1 deletion(-) + create mode 100644 arch/x86/hyperv/hv_proc.c + +diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile +index 89b1f74d3225..565358020921 100644 +--- a/arch/x86/hyperv/Makefile ++++ b/arch/x86/hyperv/Makefile +@@ -1,6 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0-only + obj-y := hv_init.o mmu.o nested.o +-obj-$(CONFIG_X86_64) += hv_apic.o ++obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o + + ifdef CONFIG_X86_64 + obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o +diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c +new file mode 100644 +index 000000000000..60461e598239 +--- /dev/null ++++ b/arch/x86/hyperv/hv_proc.c +@@ -0,0 +1,219 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* ++ * See struct hv_deposit_memory. The first u64 is partition ID, the rest ++ * are GPAs. ++ */ ++#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) ++ ++/* Deposits exact number of pages. Must be called with interrupts enabled. */ ++int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) ++{ ++ struct page **pages, *page; ++ int *counts; ++ int num_allocations; ++ int i, j, page_count; ++ int order; ++ u64 status; ++ int ret; ++ u64 base_pfn; ++ struct hv_deposit_memory *input_page; ++ unsigned long flags; ++ ++ if (num_pages > HV_DEPOSIT_MAX) ++ return -E2BIG; ++ if (!num_pages) ++ return 0; ++ ++ /* One buffer for page pointers and counts */ ++ page = alloc_page(GFP_KERNEL); ++ if (!page) ++ return -ENOMEM; ++ pages = page_address(page); ++ ++ counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); ++ if (!counts) { ++ free_page((unsigned long)pages); ++ return -ENOMEM; ++ } ++ ++ /* Allocate all the pages before disabling interrupts */ ++ i = 0; ++ ++ while (num_pages) { ++ /* Find highest order we can actually allocate */ ++ order = 31 - __builtin_clz(num_pages); ++ ++ while (1) { ++ pages[i] = alloc_pages_node(node, GFP_KERNEL, order); ++ if (pages[i]) ++ break; ++ if (!order) { ++ ret = -ENOMEM; ++ num_allocations = i; ++ goto err_free_allocations; ++ } ++ --order; ++ } ++ ++ split_page(pages[i], order); ++ counts[i] = 1 << order; ++ num_pages -= counts[i]; ++ i++; ++ } ++ num_allocations = i; ++ ++ local_irq_save(flags); ++ ++ input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); ++ ++ input_page->partition_id = partition_id; ++ ++ /* Populate gpa_page_list - these will fit on the input page */ ++ for (i = 0, page_count = 0; i < num_allocations; ++i) { ++ base_pfn = page_to_pfn(pages[i]); ++ for (j = 0; j < counts[i]; ++j, ++page_count) ++ input_page->gpa_page_list[page_count] = base_pfn + j; ++ } ++ status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, ++ page_count, 0, input_page, NULL); ++ local_irq_restore(flags); ++ ++ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { ++ pr_err("Failed to deposit pages: %lld\n", status); ++ ret = status; ++ goto err_free_allocations; ++ } ++ ++ ret = 0; ++ goto free_buf; ++ ++err_free_allocations: ++ for (i = 0; i < num_allocations; ++i) { ++ base_pfn = page_to_pfn(pages[i]); ++ for (j = 0; j < counts[i]; ++j) ++ __free_page(pfn_to_page(base_pfn + j)); ++ } ++ ++free_buf: ++ free_page((unsigned long)pages); ++ kfree(counts); ++ return ret; ++} ++ ++int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) ++{ ++ struct hv_add_logical_processor_in *input; ++ struct hv_add_logical_processor_out *output; ++ u64 status; ++ unsigned long flags; ++ int ret = 0; ++ int pxm = node_to_pxm(node); ++ ++ /* ++ * When adding a logical processor, the hypervisor may return ++ * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more ++ * pages and retry. ++ */ ++ do { ++ local_irq_save(flags); ++ ++ input = *this_cpu_ptr(hyperv_pcpu_input_arg); ++ /* We don't do anything with the output right now */ ++ output = *this_cpu_ptr(hyperv_pcpu_output_arg); ++ ++ input->lp_index = lp_index; ++ input->apic_id = apic_id; ++ input->flags = 0; ++ input->proximity_domain_info.domain_id = pxm; ++ input->proximity_domain_info.flags.reserved = 0; ++ input->proximity_domain_info.flags.proximity_info_valid = 1; ++ input->proximity_domain_info.flags.proximity_preferred = 1; ++ status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, ++ input, output); ++ local_irq_restore(flags); ++ ++ status &= HV_HYPERCALL_RESULT_MASK; ++ ++ if (status != HV_STATUS_INSUFFICIENT_MEMORY) { ++ if (status != HV_STATUS_SUCCESS) { ++ pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, ++ lp_index, apic_id, status); ++ ret = status; ++ } ++ break; ++ } ++ ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); ++ } while (!ret); ++ ++ return ret; ++} ++ ++int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) ++{ ++ struct hv_create_vp *input; ++ u64 status; ++ unsigned long irq_flags; ++ int ret = 0; ++ int pxm = node_to_pxm(node); ++ ++ /* Root VPs don't seem to need pages deposited */ ++ if (partition_id != hv_current_partition_id) { ++ /* The value 90 is empirically determined. It may change. */ ++ ret = hv_call_deposit_pages(node, partition_id, 90); ++ if (ret) ++ return ret; ++ } ++ ++ do { ++ local_irq_save(irq_flags); ++ ++ input = *this_cpu_ptr(hyperv_pcpu_input_arg); ++ ++ input->partition_id = partition_id; ++ input->vp_index = vp_index; ++ input->flags = flags; ++ input->subnode_type = HvSubnodeAny; ++ if (node != NUMA_NO_NODE) { ++ input->proximity_domain_info.domain_id = pxm; ++ input->proximity_domain_info.flags.reserved = 0; ++ input->proximity_domain_info.flags.proximity_info_valid = 1; ++ input->proximity_domain_info.flags.proximity_preferred = 1; ++ } else { ++ input->proximity_domain_info.as_uint64 = 0; ++ } ++ status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); ++ local_irq_restore(irq_flags); ++ ++ status &= HV_HYPERCALL_RESULT_MASK; ++ ++ if (status != HV_STATUS_INSUFFICIENT_MEMORY) { ++ if (status != HV_STATUS_SUCCESS) { ++ pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, ++ vp_index, flags, status); ++ ret = status; ++ } ++ break; ++ } ++ ret = hv_call_deposit_pages(node, partition_id, 1); ++ ++ } while (!ret); ++ ++ return ret; ++} ++ +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index b8324202d850..f9119781f2bb 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -82,6 +82,10 @@ extern void __percpu **hyperv_pcpu_output_arg; + + extern u64 hv_current_partition_id; + ++int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); ++int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); ++int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); ++ + static inline u64 hv_do_hypercall(u64 control, void *input, void *output) + { + u64 input_address = input ? virt_to_phys(input) : 0; +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index cbc13b7c7022..6e6a129516df 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -143,6 +143,8 @@ struct ms_hyperv_tsc_page { + #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 + #define HVCALL_SEND_IPI_EX 0x0015 + #define HVCALL_GET_PARTITION_ID 0x0046 ++#define HVCALL_DEPOSIT_MEMORY 0x0048 ++#define HVCALL_CREATE_VP 0x004e + #define HVCALL_GET_VP_REGISTERS 0x0050 + #define HVCALL_SET_VP_REGISTERS 0x0051 + #define HVCALL_POST_MESSAGE 0x005c +@@ -150,6 +152,7 @@ struct ms_hyperv_tsc_page { + #define HVCALL_POST_DEBUG_DATA 0x0069 + #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a + #define HVCALL_RESET_DEBUG_SESSION 0x006b ++#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 + #define HVCALL_RETARGET_INTERRUPT 0x007e + #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af + #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 +@@ -414,6 +417,70 @@ struct hv_get_partition_id { + u64 partition_id; + } __packed; + ++/* HvDepositMemory hypercall */ ++struct hv_deposit_memory { ++ u64 partition_id; ++ u64 gpa_page_list[]; ++} __packed; ++ ++struct hv_proximity_domain_flags { ++ u32 proximity_preferred : 1; ++ u32 reserved : 30; ++ u32 proximity_info_valid : 1; ++} __packed; ++ ++/* Not a union in windows but useful for zeroing */ ++union hv_proximity_domain_info { ++ struct { ++ u32 domain_id; ++ struct hv_proximity_domain_flags flags; ++ }; ++ u64 as_uint64; ++} __packed; ++ ++struct hv_lp_startup_status { ++ u64 hv_status; ++ u64 substatus1; ++ u64 substatus2; ++ u64 substatus3; ++ u64 substatus4; ++ u64 substatus5; ++ u64 substatus6; ++} __packed; ++ ++/* HvAddLogicalProcessor hypercall */ ++struct hv_add_logical_processor_in { ++ u32 lp_index; ++ u32 apic_id; ++ union hv_proximity_domain_info proximity_domain_info; ++ u64 flags; ++} __packed; ++ ++struct hv_add_logical_processor_out { ++ struct hv_lp_startup_status startup_status; ++} __packed; ++ ++enum HV_SUBNODE_TYPE ++{ ++ HvSubnodeAny = 0, ++ HvSubnodeSocket = 1, ++ HvSubnodeAmdNode = 2, ++ HvSubnodeL3 = 3, ++ HvSubnodeCount = 4, ++ HvSubnodeInvalid = -1 ++}; ++ ++/* HvCreateVp hypercall */ ++struct hv_create_vp { ++ u64 partition_id; ++ u32 vp_index; ++ u8 padding[3]; ++ u8 subnode_type; ++ u64 subnode_id; ++ union hv_proximity_domain_info proximity_domain_info; ++ u64 flags; ++} __packed; ++ + /* HvRetargetDeviceInterrupt hypercall */ + union hv_msi_entry { + u64 as_uint64; +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:33 +0000 +Subject: [PATCH 11/53] asm-generic/hyperv: import data structures for mapping + device interrupts + +Signed-off-by: Sunil Muthuswamy +Co-Developed-by: Sunil Muthuswamy +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-15-wei.liu@kernel.org +(cherry picked from commit 466a9c3f88d04152ca83e840ca940c5f700402ac) +--- + arch/x86/include/asm/hyperv-tlfs.h | 13 +++++++++++ + include/asm-generic/hyperv-tlfs.h | 36 ++++++++++++++++++++++++++++++ + 2 files changed, 49 insertions(+) + +diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h +index 9cf3118a80d4..19ac499ab251 100644 +--- a/arch/x86/include/asm/hyperv-tlfs.h ++++ b/arch/x86/include/asm/hyperv-tlfs.h +@@ -541,6 +541,19 @@ struct hv_partition_assist_pg { + u32 tlb_lock_count; + }; + ++enum hv_interrupt_type { ++ HV_X64_INTERRUPT_TYPE_FIXED = 0x0000, ++ HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001, ++ HV_X64_INTERRUPT_TYPE_SMI = 0x0002, ++ HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003, ++ HV_X64_INTERRUPT_TYPE_NMI = 0x0004, ++ HV_X64_INTERRUPT_TYPE_INIT = 0x0005, ++ HV_X64_INTERRUPT_TYPE_SIPI = 0x0006, ++ HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007, ++ HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008, ++ HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009, ++ HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A, ++}; + + #include + +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index 6e6a129516df..b6ed949be3bf 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -153,6 +153,8 @@ struct ms_hyperv_tsc_page { + #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a + #define HVCALL_RESET_DEBUG_SESSION 0x006b + #define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 ++#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c ++#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d + #define HVCALL_RETARGET_INTERRUPT 0x007e + #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af + #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 +@@ -647,4 +649,38 @@ union hv_device_id { + } acpi; + } __packed; + ++enum hv_interrupt_trigger_mode { ++ HV_INTERRUPT_TRIGGER_MODE_EDGE = 0, ++ HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1, ++}; ++ ++struct hv_device_interrupt_descriptor { ++ u32 interrupt_type; ++ u32 trigger_mode; ++ u32 vector_count; ++ u32 reserved; ++ struct hv_device_interrupt_target target; ++} __packed; ++ ++struct hv_input_map_device_interrupt { ++ u64 partition_id; ++ u64 device_id; ++ u64 flags; ++ struct hv_interrupt_entry logical_interrupt_entry; ++ struct hv_device_interrupt_descriptor interrupt_descriptor; ++} __packed; ++ ++struct hv_output_map_device_interrupt { ++ struct hv_interrupt_entry interrupt_entry; ++} __packed; ++ ++struct hv_input_unmap_device_interrupt { ++ u64 partition_id; ++ u64 device_id; ++ struct hv_interrupt_entry interrupt_entry; ++} __packed; ++ ++#define HV_SOURCE_SHADOW_NONE 0x0 ++#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 ++ + #endif +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:30 +0000 +Subject: [PATCH 12/53] asm-generic/hyperv: update hv_msi_entry + +We will soon need to access fields inside the MSI address and MSI data +fields. Introduce hv_msi_address_register and hv_msi_data_register. + +Fix up one user of hv_msi_entry in mshyperv.h. + +No functional change expected. + +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-12-wei.liu@kernel.org +(cherry picked from commit d589ae61bc27b2b9aaac0bf20a9077b6fbda32b6) +--- + arch/x86/include/asm/mshyperv.h | 4 ++-- + include/asm-generic/hyperv-tlfs.h | 28 ++++++++++++++++++++++++++-- + 2 files changed, 28 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index f9119781f2bb..7bd4022da061 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -259,8 +259,8 @@ static inline void hv_apic_init(void) {} + static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + struct msi_desc *msi_desc) + { +- msi_entry->address = msi_desc->msg.address_lo; +- msi_entry->data = msi_desc->msg.data; ++ msi_entry->address.as_uint32 = msi_desc->msg.address_lo; ++ msi_entry->data.as_uint32 = msi_desc->msg.data; + } + + #else /* CONFIG_HYPERV */ +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index b6ed949be3bf..2040b196fe59 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -483,12 +483,36 @@ struct hv_create_vp { + u64 flags; + } __packed; + ++union hv_msi_address_register { ++ u32 as_uint32; ++ struct { ++ u32 reserved1:2; ++ u32 destination_mode:1; ++ u32 redirection_hint:1; ++ u32 reserved2:8; ++ u32 destination_id:8; ++ u32 msi_base:12; ++ }; ++} __packed; ++ ++union hv_msi_data_register { ++ u32 as_uint32; ++ struct { ++ u32 vector:8; ++ u32 delivery_mode:3; ++ u32 reserved1:3; ++ u32 level_assert:1; ++ u32 trigger_mode:1; ++ u32 reserved2:16; ++ }; ++} __packed; ++ + /* HvRetargetDeviceInterrupt hypercall */ + union hv_msi_entry { + u64 as_uint64; + struct { +- u32 address; +- u32 data; ++ union hv_msi_address_register address; ++ union hv_msi_data_register data; + } __packed; + }; + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:31 +0000 +Subject: [PATCH 13/53] asm-generic/hyperv: update hv_interrupt_entry + +We will soon use the same structure to handle IO-APIC interrupts as +well. Introduce an enum to identify the source and a data structure for +IO-APIC RTE. + +While at it, update pci-hyperv.c to use the enum. + +No functional change. + +Signed-off-by: Wei Liu +Acked-by: Rob Herring +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-13-wei.liu@kernel.org +(cherry picked from commit b59fb7b60d47b2af3a114daf0ae198aa23921698) +--- + drivers/pci/controller/pci-hyperv.c | 2 +- + include/asm-generic/hyperv-tlfs.h | 36 +++++++++++++++++++++++++++-- + 2 files changed, 35 insertions(+), 3 deletions(-) + +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index ad3e3cde1c20..7fd8cd554675 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -1215,7 +1215,7 @@ static void hv_irq_unmask(struct irq_data *data) + params = &hbus->retarget_msi_interrupt_params; + memset(params, 0, sizeof(*params)); + params->partition_id = HV_PARTITION_ID_SELF; +- params->int_entry.source = 1; /* MSI(-X) */ ++ params->int_entry.source = HV_INTERRUPT_SOURCE_MSI; + hv_set_msi_entry_from_desc(¶ms->int_entry.msi_entry, msi_desc); + params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | + (hbus->hdev->dev_instance.b[4] << 16) | +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index 2040b196fe59..83448e837ded 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -483,6 +483,11 @@ struct hv_create_vp { + u64 flags; + } __packed; + ++enum hv_interrupt_source { ++ HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */ ++ HV_INTERRUPT_SOURCE_IOAPIC, ++}; ++ + union hv_msi_address_register { + u32 as_uint32; + struct { +@@ -516,10 +521,37 @@ union hv_msi_entry { + } __packed; + }; + ++union hv_ioapic_rte { ++ u64 as_uint64; ++ ++ struct { ++ u32 vector:8; ++ u32 delivery_mode:3; ++ u32 destination_mode:1; ++ u32 delivery_status:1; ++ u32 interrupt_polarity:1; ++ u32 remote_irr:1; ++ u32 trigger_mode:1; ++ u32 interrupt_mask:1; ++ u32 reserved1:15; ++ ++ u32 reserved2:24; ++ u32 destination_id:8; ++ }; ++ ++ struct { ++ u32 low_uint32; ++ u32 high_uint32; ++ }; ++} __packed; ++ + struct hv_interrupt_entry { +- u32 source; /* 1 for MSI(-X) */ ++ u32 source; + u32 reserved1; +- union hv_msi_entry msi_entry; ++ union { ++ union hv_msi_entry msi_entry; ++ union hv_ioapic_rte ioapic_rte; ++ }; + } __packed; + + /* +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:34 +0000 +Subject: [PATCH 14/53] x86/hyperv: implement an MSI domain for root partition + +When Linux runs as the root partition on Microsoft Hypervisor, its +interrupts are remapped. Linux will need to explicitly map and unmap +interrupts for hardware. + +Implement an MSI domain to issue the correct hypercalls. And initialize +this irq domain as the default MSI irq domain. + +Signed-off-by: Sunil Muthuswamy +Co-Developed-by: Sunil Muthuswamy +Signed-off-by: Wei Liu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-16-wei.liu@kernel.org +(cherry picked from commit e39397d1fd6851bef4dfb63a631b8e15d1f43329) +--- + arch/x86/hyperv/Makefile | 2 +- + arch/x86/hyperv/hv_init.c | 9 + + arch/x86/hyperv/irqdomain.c | 360 ++++++++++++++++++++++++++++++++ + arch/x86/include/asm/mshyperv.h | 2 + + 4 files changed, 372 insertions(+), 1 deletion(-) + create mode 100644 arch/x86/hyperv/irqdomain.c + +diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile +index 565358020921..48e2c51464e8 100644 +--- a/arch/x86/hyperv/Makefile ++++ b/arch/x86/hyperv/Makefile +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: GPL-2.0-only +-obj-y := hv_init.o mmu.o nested.o ++obj-y := hv_init.o mmu.o nested.o irqdomain.o + obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o + + ifdef CONFIG_X86_64 +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index aeea8fbf3c23..b81047dec1da 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -511,6 +511,15 @@ void __init hyperv_init(void) + + BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); + ++#ifdef CONFIG_PCI_MSI ++ /* ++ * If we're running as root, we want to create our own PCI MSI domain. ++ * We can't set this in hv_pci_init because that would be too late. ++ */ ++ if (hv_root_partition) ++ x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; ++#endif ++ + return; + + remove_cpuhp_state: +diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c +new file mode 100644 +index 000000000000..bddcf1d6860d +--- /dev/null ++++ b/arch/x86/hyperv/irqdomain.c +@@ -0,0 +1,360 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor. ++ * ++ * Authors: ++ * Sunil Muthuswamy ++ * Wei Liu ++ */ ++ ++#include ++#include ++#include ++ ++static int hv_map_interrupt(union hv_device_id device_id, bool level, ++ int cpu, int vector, struct hv_interrupt_entry *entry) ++{ ++ struct hv_input_map_device_interrupt *input; ++ struct hv_output_map_device_interrupt *output; ++ struct hv_device_interrupt_descriptor *intr_desc; ++ unsigned long flags; ++ u64 status; ++ int nr_bank, var_size; ++ ++ local_irq_save(flags); ++ ++ input = *this_cpu_ptr(hyperv_pcpu_input_arg); ++ output = *this_cpu_ptr(hyperv_pcpu_output_arg); ++ ++ intr_desc = &input->interrupt_descriptor; ++ memset(input, 0, sizeof(*input)); ++ input->partition_id = hv_current_partition_id; ++ input->device_id = device_id.as_uint64; ++ intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; ++ intr_desc->vector_count = 1; ++ intr_desc->target.vector = vector; ++ ++ if (level) ++ intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL; ++ else ++ intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; ++ ++ intr_desc->target.vp_set.valid_bank_mask = 0; ++ intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; ++ nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); ++ if (nr_bank < 0) { ++ local_irq_restore(flags); ++ pr_err("%s: unable to generate VP set\n", __func__); ++ return EINVAL; ++ } ++ intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; ++ ++ /* ++ * var-sized hypercall, var-size starts after vp_mask (thus ++ * vp_set.format does not count, but vp_set.valid_bank_mask ++ * does). ++ */ ++ var_size = nr_bank + 1; ++ ++ status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size, ++ input, output); ++ *entry = output->interrupt_entry; ++ ++ local_irq_restore(flags); ++ ++ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) ++ pr_err("%s: hypercall failed, status %lld\n", __func__, status); ++ ++ return status & HV_HYPERCALL_RESULT_MASK; ++} ++ ++static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) ++{ ++ unsigned long flags; ++ struct hv_input_unmap_device_interrupt *input; ++ struct hv_interrupt_entry *intr_entry; ++ u64 status; ++ ++ local_irq_save(flags); ++ input = *this_cpu_ptr(hyperv_pcpu_input_arg); ++ ++ memset(input, 0, sizeof(*input)); ++ intr_entry = &input->interrupt_entry; ++ input->partition_id = hv_current_partition_id; ++ input->device_id = id; ++ *intr_entry = *old_entry; ++ ++ status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); ++ local_irq_restore(flags); ++ ++ return status & HV_HYPERCALL_RESULT_MASK; ++} ++ ++#ifdef CONFIG_PCI_MSI ++struct rid_data { ++ struct pci_dev *bridge; ++ u32 rid; ++}; ++ ++static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data) ++{ ++ struct rid_data *rd = data; ++ u8 bus = PCI_BUS_NUM(rd->rid); ++ ++ if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) { ++ rd->bridge = pdev; ++ rd->rid = alias; ++ } ++ ++ return 0; ++} ++ ++static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) ++{ ++ union hv_device_id dev_id; ++ struct rid_data data = { ++ .bridge = NULL, ++ .rid = PCI_DEVID(dev->bus->number, dev->devfn) ++ }; ++ ++ pci_for_each_dma_alias(dev, get_rid_cb, &data); ++ ++ dev_id.as_uint64 = 0; ++ dev_id.device_type = HV_DEVICE_TYPE_PCI; ++ dev_id.pci.segment = pci_domain_nr(dev->bus); ++ ++ dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid); ++ dev_id.pci.bdf.device = PCI_SLOT(data.rid); ++ dev_id.pci.bdf.function = PCI_FUNC(data.rid); ++ dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE; ++ ++ if (data.bridge) { ++ int pos; ++ ++ /* ++ * Microsoft Hypervisor requires a bus range when the bridge is ++ * running in PCI-X mode. ++ * ++ * To distinguish conventional vs PCI-X bridge, we can check ++ * the bridge's PCI-X Secondary Status Register, Secondary Bus ++ * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge ++ * Specification Revision 1.0 5.2.2.1.3. ++ * ++ * Value zero means it is in conventional mode, otherwise it is ++ * in PCI-X mode. ++ */ ++ ++ pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX); ++ if (pos) { ++ u16 status; ++ ++ pci_read_config_word(data.bridge, pos + ++ PCI_X_BRIDGE_SSTATUS, &status); ++ ++ if (status & PCI_X_SSTATUS_FREQ) { ++ /* Non-zero, PCI-X mode */ ++ u8 sec_bus, sub_bus; ++ ++ dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE; ++ ++ pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus); ++ dev_id.pci.shadow_bus_range.secondary_bus = sec_bus; ++ pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus); ++ dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus; ++ } ++ } ++ } ++ ++ return dev_id; ++} ++ ++static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, ++ struct hv_interrupt_entry *entry) ++{ ++ union hv_device_id device_id = hv_build_pci_dev_id(dev); ++ ++ return hv_map_interrupt(device_id, false, cpu, vector, entry); ++} ++ ++static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) ++{ ++ /* High address is always 0 */ ++ msg->address_hi = 0; ++ msg->address_lo = entry->msi_entry.address.as_uint32; ++ msg->data = entry->msi_entry.data.as_uint32; ++} ++ ++static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); ++static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ++{ ++ struct msi_desc *msidesc; ++ struct pci_dev *dev; ++ struct hv_interrupt_entry out_entry, *stored_entry; ++ struct irq_cfg *cfg = irqd_cfg(data); ++ cpumask_t *affinity; ++ int cpu; ++ u64 status; ++ ++ msidesc = irq_data_get_msi_desc(data); ++ dev = msi_desc_to_pci_dev(msidesc); ++ ++ if (!cfg) { ++ pr_debug("%s: cfg is NULL", __func__); ++ return; ++ } ++ ++ affinity = irq_data_get_effective_affinity_mask(data); ++ cpu = cpumask_first_and(affinity, cpu_online_mask); ++ ++ if (data->chip_data) { ++ /* ++ * This interrupt is already mapped. Let's unmap first. ++ * ++ * We don't use retarget interrupt hypercalls here because ++ * Microsoft Hypervisor doens't allow root to change the vector ++ * or specify VPs outside of the set that is initially used ++ * during mapping. ++ */ ++ stored_entry = data->chip_data; ++ data->chip_data = NULL; ++ ++ status = hv_unmap_msi_interrupt(dev, stored_entry); ++ ++ kfree(stored_entry); ++ ++ if (status != HV_STATUS_SUCCESS) { ++ pr_debug("%s: failed to unmap, status %lld", __func__, status); ++ return; ++ } ++ } ++ ++ stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); ++ if (!stored_entry) { ++ pr_debug("%s: failed to allocate chip data\n", __func__); ++ return; ++ } ++ ++ status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); ++ if (status != HV_STATUS_SUCCESS) { ++ kfree(stored_entry); ++ return; ++ } ++ ++ *stored_entry = out_entry; ++ data->chip_data = stored_entry; ++ entry_to_msi_msg(&out_entry, msg); ++ ++ return; ++} ++ ++static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry) ++{ ++ return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry); ++} ++ ++static void hv_teardown_msi_irq_common(struct pci_dev *dev, struct msi_desc *msidesc, int irq) ++{ ++ u64 status; ++ struct hv_interrupt_entry old_entry; ++ struct irq_desc *desc; ++ struct irq_data *data; ++ struct msi_msg msg; ++ ++ desc = irq_to_desc(irq); ++ if (!desc) { ++ pr_debug("%s: no irq desc\n", __func__); ++ return; ++ } ++ ++ data = &desc->irq_data; ++ if (!data) { ++ pr_debug("%s: no irq data\n", __func__); ++ return; ++ } ++ ++ if (!data->chip_data) { ++ pr_debug("%s: no chip data\n!", __func__); ++ return; ++ } ++ ++ old_entry = *(struct hv_interrupt_entry *)data->chip_data; ++ entry_to_msi_msg(&old_entry, &msg); ++ ++ kfree(data->chip_data); ++ data->chip_data = NULL; ++ ++ status = hv_unmap_msi_interrupt(dev, &old_entry); ++ ++ if (status != HV_STATUS_SUCCESS) { ++ pr_err("%s: hypercall failed, status %lld\n", __func__, status); ++ return; ++ } ++} ++ ++static void hv_msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) ++{ ++ int i; ++ struct msi_desc *entry; ++ struct pci_dev *pdev; ++ ++ if (WARN_ON_ONCE(!dev_is_pci(dev))) ++ return; ++ ++ pdev = to_pci_dev(dev); ++ ++ for_each_pci_msi_entry(entry, pdev) { ++ if (entry->irq) { ++ for (i = 0; i < entry->nvec_used; i++) { ++ hv_teardown_msi_irq_common(pdev, entry, entry->irq + i); ++ irq_domain_free_irqs(entry->irq + i, 1); ++ } ++ } ++ } ++} ++ ++/* ++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, ++ * which implement the MSI or MSI-X Capability Structure. ++ */ ++static struct irq_chip hv_pci_msi_controller = { ++ .name = "HV-PCI-MSI", ++ .irq_unmask = pci_msi_unmask_irq, ++ .irq_mask = pci_msi_mask_irq, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_compose_msi_msg = hv_irq_compose_msi_msg, ++ .irq_set_affinity = msi_domain_set_affinity, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++static struct msi_domain_ops pci_msi_domain_ops = { ++ .domain_free_irqs = hv_msi_domain_free_irqs, ++ .msi_prepare = pci_msi_prepare, ++}; ++ ++static struct msi_domain_info hv_pci_msi_domain_info = { ++ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | ++ MSI_FLAG_PCI_MSIX, ++ .ops = &pci_msi_domain_ops, ++ .chip = &hv_pci_msi_controller, ++ .handler = handle_edge_irq, ++ .handler_name = "edge", ++}; ++ ++struct irq_domain * __init hv_create_pci_msi_domain(void) ++{ ++ struct irq_domain *d = NULL; ++ struct fwnode_handle *fn; ++ ++ fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); ++ if (fn) ++ d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); ++ ++ /* No point in going further if we can't get an irq domain */ ++ BUG_ON(!d); ++ ++ return d; ++} ++ ++#endif /* CONFIG_PCI_MSI */ +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 7bd4022da061..4533773115ea 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -263,6 +263,8 @@ static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + msi_entry->data.as_uint32 = msi_desc->msg.data; + } + ++struct irq_domain *hv_create_pci_msi_domain(void); ++ + #else /* CONFIG_HYPERV */ + static inline void hyperv_init(void) {} + static inline void hyperv_setup_mmu_ops(void) {} +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Wed, 3 Feb 2021 15:04:35 +0000 +Subject: [PATCH 15/53] iommu/hyperv: setup an IO-APIC IRQ remapping domain for + root partition + +Just like MSI/MSI-X, IO-APIC interrupts are remapped by Microsoft +Hypervisor when Linux runs as the root partition. Implement an IRQ +domain to handle mapping and unmapping of IO-APIC interrupts. + +Signed-off-by: Wei Liu +Acked-by: Joerg Roedel +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210203150435.27941-17-wei.liu@kernel.org +(cherry picked from commit fb5ef35165a37ca63ef0227657eabd06f0a39cf9) +--- + arch/x86/hyperv/irqdomain.c | 25 +++++ + arch/x86/include/asm/mshyperv.h | 4 + + drivers/iommu/hyperv-iommu.c | 177 +++++++++++++++++++++++++++++++- + 3 files changed, 203 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c +index bddcf1d6860d..4421a8d92e23 100644 +--- a/arch/x86/hyperv/irqdomain.c ++++ b/arch/x86/hyperv/irqdomain.c +@@ -358,3 +358,28 @@ struct irq_domain * __init hv_create_pci_msi_domain(void) + } + + #endif /* CONFIG_PCI_MSI */ ++ ++int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry) ++{ ++ union hv_device_id device_id; ++ ++ device_id.as_uint64 = 0; ++ device_id.device_type = HV_DEVICE_TYPE_IOAPIC; ++ device_id.ioapic.ioapic_id = (u8)ioapic_id; ++ ++ return hv_unmap_interrupt(device_id.as_uint64, entry); ++} ++EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt); ++ ++int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector, ++ struct hv_interrupt_entry *entry) ++{ ++ union hv_device_id device_id; ++ ++ device_id.as_uint64 = 0; ++ device_id.device_type = HV_DEVICE_TYPE_IOAPIC; ++ device_id.ioapic.ioapic_id = (u8)ioapic_id; ++ ++ return hv_map_interrupt(device_id, level, cpu, vector, entry); ++} ++EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt); +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 4533773115ea..ccf60a809a17 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -265,6 +265,10 @@ static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + + struct irq_domain *hv_create_pci_msi_domain(void); + ++int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, ++ struct hv_interrupt_entry *entry); ++int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); ++ + #else /* CONFIG_HYPERV */ + static inline void hyperv_init(void) {} + static inline void hyperv_setup_mmu_ops(void) {} +diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c +index e09e2d734c57..d08e5b9e4f0e 100644 +--- a/drivers/iommu/hyperv-iommu.c ++++ b/drivers/iommu/hyperv-iommu.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #include "irq_remapping.h" + +@@ -137,29 +138,42 @@ static const struct irq_domain_ops hyperv_ir_domain_ops = { + .activate = hyperv_irq_remapping_activate, + }; + ++static const struct irq_domain_ops hyperv_root_ir_domain_ops; + static int __init hyperv_prepare_irq_remapping(void) + { + struct fwnode_handle *fn; + int i; ++ const char *name; ++ const struct irq_domain_ops *ops; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || + !x2apic_supported()) + return -ENODEV; + +- fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); ++ if (hv_root_partition) { ++ name = "HYPERV-ROOT-IR"; ++ ops = &hyperv_root_ir_domain_ops; ++ } else { ++ name = "HYPERV-IR"; ++ ops = &hyperv_ir_domain_ops; ++ } ++ ++ fn = irq_domain_alloc_named_id_fwnode(name, 0); + if (!fn) + return -ENOMEM; + + ioapic_ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), +- 0, IOAPIC_REMAPPING_ENTRY, fn, +- &hyperv_ir_domain_ops, NULL); ++ 0, IOAPIC_REMAPPING_ENTRY, fn, ops, NULL); + + if (!ioapic_ir_domain) { + irq_domain_free_fwnode(fn); + return -ENOMEM; + } + ++ if (hv_root_partition) ++ return 0; /* The rest is only relevant to guests */ ++ + /* + * Hyper-V doesn't provide irq remapping function for + * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. +@@ -196,4 +210,161 @@ struct irq_remap_ops hyperv_irq_remap_ops = { + .get_irq_domain = hyperv_get_irq_domain, + }; + ++/* IRQ remapping domain when Linux runs as the root partition */ ++struct hyperv_root_ir_data { ++ u8 ioapic_id; ++ bool is_level; ++ struct hv_interrupt_entry entry; ++}; ++ ++static void ++hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) ++{ ++ u64 status; ++ u32 vector; ++ struct irq_cfg *cfg; ++ int ioapic_id; ++ struct cpumask *affinity; ++ int cpu; ++ struct hv_interrupt_entry entry; ++ struct hyperv_root_ir_data *data = irq_data->chip_data; ++ struct IO_APIC_route_entry e; ++ ++ cfg = irqd_cfg(irq_data); ++ affinity = irq_data_get_effective_affinity_mask(irq_data); ++ cpu = cpumask_first_and(affinity, cpu_online_mask); ++ ++ vector = cfg->vector; ++ ioapic_id = data->ioapic_id; ++ ++ if (data->entry.source == HV_DEVICE_TYPE_IOAPIC ++ && data->entry.ioapic_rte.as_uint64) { ++ entry = data->entry; ++ ++ status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); ++ ++ if (status != HV_STATUS_SUCCESS) ++ pr_debug("%s: unexpected unmap status %lld\n", __func__, status); ++ ++ data->entry.ioapic_rte.as_uint64 = 0; ++ data->entry.source = 0; /* Invalid source */ ++ } ++ ++ ++ status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, ++ vector, &entry); ++ ++ if (status != HV_STATUS_SUCCESS) { ++ pr_err("%s: map hypercall failed, status %lld\n", __func__, status); ++ return; ++ } ++ ++ data->entry = entry; ++ ++ /* Turn it into an IO_APIC_route_entry, and generate MSI MSG. */ ++ e.w1 = entry.ioapic_rte.low_uint32; ++ e.w2 = entry.ioapic_rte.high_uint32; ++ ++ memset(msg, 0, sizeof(*msg)); ++ msg->arch_data.vector = e.vector; ++ msg->arch_data.delivery_mode = e.delivery_mode; ++ msg->arch_addr_lo.dest_mode_logical = e.dest_mode_logical; ++ msg->arch_addr_lo.dmar_format = e.ir_format; ++ msg->arch_addr_lo.dmar_index_0_14 = e.ir_index_0_14; ++} ++ ++static int hyperv_root_ir_set_affinity(struct irq_data *data, ++ const struct cpumask *mask, bool force) ++{ ++ struct irq_data *parent = data->parent_data; ++ struct irq_cfg *cfg = irqd_cfg(data); ++ int ret; ++ ++ ret = parent->chip->irq_set_affinity(parent, mask, force); ++ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) ++ return ret; ++ ++ send_cleanup_vector(cfg); ++ ++ return 0; ++} ++ ++static struct irq_chip hyperv_root_ir_chip = { ++ .name = "HYPERV-ROOT-IR", ++ .irq_ack = apic_ack_irq, ++ .irq_set_affinity = hyperv_root_ir_set_affinity, ++ .irq_compose_msi_msg = hyperv_root_ir_compose_msi_msg, ++}; ++ ++static int hyperv_root_irq_remapping_alloc(struct irq_domain *domain, ++ unsigned int virq, unsigned int nr_irqs, ++ void *arg) ++{ ++ struct irq_alloc_info *info = arg; ++ struct irq_data *irq_data; ++ struct hyperv_root_ir_data *data; ++ int ret = 0; ++ ++ if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) ++ return -EINVAL; ++ ++ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); ++ if (ret < 0) ++ return ret; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) { ++ irq_domain_free_irqs_common(domain, virq, nr_irqs); ++ return -ENOMEM; ++ } ++ ++ irq_data = irq_domain_get_irq_data(domain, virq); ++ if (!irq_data) { ++ kfree(data); ++ irq_domain_free_irqs_common(domain, virq, nr_irqs); ++ return -EINVAL; ++ } ++ ++ data->ioapic_id = info->devid; ++ data->is_level = info->ioapic.is_level; ++ ++ irq_data->chip = &hyperv_root_ir_chip; ++ irq_data->chip_data = data; ++ ++ return 0; ++} ++ ++static void hyperv_root_irq_remapping_free(struct irq_domain *domain, ++ unsigned int virq, unsigned int nr_irqs) ++{ ++ struct irq_data *irq_data; ++ struct hyperv_root_ir_data *data; ++ struct hv_interrupt_entry *e; ++ int i; ++ ++ for (i = 0; i < nr_irqs; i++) { ++ irq_data = irq_domain_get_irq_data(domain, virq + i); ++ ++ if (irq_data && irq_data->chip_data) { ++ data = irq_data->chip_data; ++ e = &data->entry; ++ ++ if (e->source == HV_DEVICE_TYPE_IOAPIC ++ && e->ioapic_rte.as_uint64) ++ hv_unmap_ioapic_interrupt(data->ioapic_id, ++ &data->entry); ++ ++ kfree(data); ++ } ++ } ++ ++ irq_domain_free_irqs_common(domain, virq, nr_irqs); ++} ++ ++static const struct irq_domain_ops hyperv_root_ir_domain_ops = { ++ .select = hyperv_irq_remapping_select, ++ .alloc = hyperv_root_irq_remapping_alloc, ++ .free = hyperv_root_irq_remapping_free, ++}; ++ + #endif +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:13 -0800 +Subject: [PATCH 16/53] Drivers: hv: vmbus: Move Hyper-V page allocator to arch + neutral code + +The Hyper-V page allocator functions are implemented in an architecture +neutral way. Move them into the architecture neutral VMbus module so +a separate implementation for ARM64 is not needed. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Link: https://lore.kernel.org/r/1614721102-2241-2-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit ca48739e59df31d16c27dbcd9ea2ea61d7caa9fb) +--- + arch/x86/hyperv/hv_init.c | 22 -------------------- + arch/x86/include/asm/mshyperv.h | 5 ----- + drivers/hv/hv.c | 36 +++++++++++++++++++++++++++++++++ + include/asm-generic/mshyperv.h | 4 ++++ + 4 files changed, 40 insertions(+), 27 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index b81047dec1da..4bdb3443b25e 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -54,28 +54,6 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); + u32 hv_max_vp_index; + EXPORT_SYMBOL_GPL(hv_max_vp_index); + +-void *hv_alloc_hyperv_page(void) +-{ +- BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); +- +- return (void *)__get_free_page(GFP_KERNEL); +-} +-EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); +- +-void *hv_alloc_hyperv_zeroed_page(void) +-{ +- BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); +- +- return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); +-} +-EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); +- +-void hv_free_hyperv_page(unsigned long addr) +-{ +- free_page(addr); +-} +-EXPORT_SYMBOL_GPL(hv_free_hyperv_page); +- + static int hv_cpu_init(unsigned int cpu) + { + u64 msr_vp_index; +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index ccf60a809a17..ef6e968e2828 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -233,9 +233,6 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) + + void __init hyperv_init(void); + void hyperv_setup_mmu_ops(void); +-void *hv_alloc_hyperv_page(void); +-void *hv_alloc_hyperv_zeroed_page(void); +-void hv_free_hyperv_page(unsigned long addr); + void set_hv_tscchange_cb(void (*cb)(void)); + void clear_hv_tscchange_cb(void); + void hyperv_stop_tsc_emulation(void); +@@ -272,8 +269,6 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); + #else /* CONFIG_HYPERV */ + static inline void hyperv_init(void) {} + static inline void hyperv_setup_mmu_ops(void) {} +-static inline void *hv_alloc_hyperv_page(void) { return NULL; } +-static inline void hv_free_hyperv_page(unsigned long addr) {} + static inline void set_hv_tscchange_cb(void (*cb)(void)) {} + static inline void clear_hv_tscchange_cb(void) {} + static inline void hyperv_stop_tsc_emulation(void) {}; +diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c +index f202ac7f4b3d..cca8d5ea61f0 100644 +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -36,6 +36,42 @@ int hv_init(void) + return 0; + } + ++/* ++ * Functions for allocating and freeing memory with size and ++ * alignment HV_HYP_PAGE_SIZE. These functions are needed because ++ * the guest page size may not be the same as the Hyper-V page ++ * size. We depend upon kmalloc() aligning power-of-two size ++ * allocations to the allocation size boundary, so that the ++ * allocated memory appears to Hyper-V as a page of the size ++ * it expects. ++ */ ++ ++void *hv_alloc_hyperv_page(void) ++{ ++ BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); ++ ++ if (PAGE_SIZE == HV_HYP_PAGE_SIZE) ++ return (void *)__get_free_page(GFP_KERNEL); ++ else ++ return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); ++} ++ ++void *hv_alloc_hyperv_zeroed_page(void) ++{ ++ if (PAGE_SIZE == HV_HYP_PAGE_SIZE) ++ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); ++ else ++ return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); ++} ++ ++void hv_free_hyperv_page(unsigned long addr) ++{ ++ if (PAGE_SIZE == HV_HYP_PAGE_SIZE) ++ free_page(addr); ++ else ++ kfree((void *)addr); ++} ++ + /* + * hv_post_message - Post a message using the hypervisor message IPC. + * +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index dff58a3db5d5..694b5bc3561c 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -117,6 +117,10 @@ extern u32 hv_max_vp_index; + /* Sentinel value for an uninitialized entry in hv_vp_index array */ + #define VP_INVAL U32_MAX + ++void *hv_alloc_hyperv_page(void); ++void *hv_alloc_hyperv_zeroed_page(void); ++void hv_free_hyperv_page(unsigned long addr); ++ + /** + * hv_cpu_number_to_vp_number() - Map CPU to VP. + * @cpu_number: CPU number in Linux terms +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:14 -0800 +Subject: [PATCH 17/53] x86/hyper-v: Move hv_message_type to architecture + neutral module + +The definition of enum hv_message_type includes arch neutral and +x86/x64-specific values. Ideally there would be a way to put the +arch neutral values in an arch neutral module, and the arch +specific values in an arch specific module. But C doesn't provide +a way to extend enum types. As a compromise, move the entire +definition into an arch neutral module, to avoid duplicating the +arch neutral values for x86/x64 and for ARM64. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Link: https://lore.kernel.org/r/1614721102-2241-3-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 5e4e6ddf8d74068fd6bb7922dabcfa2c0f506c39) +--- + arch/x86/include/asm/hyperv-tlfs.h | 29 ------------------------- + include/asm-generic/hyperv-tlfs.h | 35 ++++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+), 29 deletions(-) + +diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h +index 19ac499ab251..119cc587775a 100644 +--- a/arch/x86/include/asm/hyperv-tlfs.h ++++ b/arch/x86/include/asm/hyperv-tlfs.h +@@ -281,35 +281,6 @@ struct hv_tsc_emulation_status { + #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 + #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + +- +-/* Define hypervisor message types. */ +-enum hv_message_type { +- HVMSG_NONE = 0x00000000, +- +- /* Memory access messages. */ +- HVMSG_UNMAPPED_GPA = 0x80000000, +- HVMSG_GPA_INTERCEPT = 0x80000001, +- +- /* Timer notification messages. */ +- HVMSG_TIMER_EXPIRED = 0x80000010, +- +- /* Error messages. */ +- HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, +- HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, +- HVMSG_UNSUPPORTED_FEATURE = 0x80000022, +- +- /* Trace buffer complete messages. */ +- HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, +- +- /* Platform-specific processor intercept messages. */ +- HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, +- HVMSG_X64_MSR_INTERCEPT = 0x80010001, +- HVMSG_X64_CPUID_INTERCEPT = 0x80010002, +- HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, +- HVMSG_X64_APIC_EOI = 0x80010004, +- HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +-}; +- + struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index 83448e837ded..9cf10837d005 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -220,6 +220,41 @@ enum HV_GENERIC_SET_FORMAT { + #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) + #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) + ++/* ++ * Define hypervisor message types. Some of the message types ++ * are x86/x64 specific, but there's no good way to separate ++ * them out into the arch-specific version of hyperv-tlfs.h ++ * because C doesn't provide a way to extend enum types. ++ * Keeping them all in the arch neutral hyperv-tlfs.h seems ++ * the least messy compromise. ++ */ ++enum hv_message_type { ++ HVMSG_NONE = 0x00000000, ++ ++ /* Memory access messages. */ ++ HVMSG_UNMAPPED_GPA = 0x80000000, ++ HVMSG_GPA_INTERCEPT = 0x80000001, ++ ++ /* Timer notification messages. */ ++ HVMSG_TIMER_EXPIRED = 0x80000010, ++ ++ /* Error messages. */ ++ HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, ++ HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, ++ HVMSG_UNSUPPORTED_FEATURE = 0x80000022, ++ ++ /* Trace buffer complete messages. */ ++ HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, ++ ++ /* Platform-specific processor intercept messages. */ ++ HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, ++ HVMSG_X64_MSR_INTERCEPT = 0x80010001, ++ HVMSG_X64_CPUID_INTERCEPT = 0x80010002, ++ HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, ++ HVMSG_X64_APIC_EOI = 0x80010004, ++ HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 ++}; ++ + /* Define synthetic interrupt controller message flags. */ + union hv_message_flags { + __u8 asu8; +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:20 -0800 +Subject: [PATCH 18/53] clocksource/drivers/hyper-v: Handle sched_clock + differences inline + +While the Hyper-V Reference TSC code is architecture neutral, the +pv_ops.time.sched_clock() function is implemented for x86/x64, but not +for ARM64. Current code calls a utility function under arch/x86 (and +coming, under arch/arm64) to handle the difference. + +Change this approach to handle the difference inline based on whether +GENERIC_SCHED_CLOCK is present. The new approach removes code under +arch/* since the difference is tied more to the specifics of the Linux +implementation than to the architecture. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Acked-by: Daniel Lezcano +Link: https://lore.kernel.org/r/1614721102-2241-9-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit eb3e1d370b4c57be1acbb9de51a7deaa036eff4b) +--- + arch/x86/include/asm/mshyperv.h | 11 ----------- + drivers/clocksource/hyperv_timer.c | 24 ++++++++++++++++++++++++ + 2 files changed, 24 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index ef6e968e2828..212d34f80bb4 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -56,17 +56,6 @@ typedef int (*hyperv_fill_flush_list_func)( + #define hv_get_raw_timer() rdtsc_ordered() + #define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR + +-/* +- * Reference to pv_ops must be inline so objtool +- * detection of noinstr violations can work correctly. +- */ +-static __always_inline void hv_setup_sched_clock(void *sched_clock) +-{ +-#ifdef CONFIG_PARAVIRT +- pv_ops.time.sched_clock = sched_clock; +-#endif +-} +- + void hyperv_vector_handler(struct pt_regs *regs); + + static inline void hv_enable_stimer0_percpu_irq(int irq) {} +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index 269a691bd2c4..cabd8453461f 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -418,6 +418,30 @@ static struct clocksource hyperv_cs_msr = { + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + }; + ++/* ++ * Reference to pv_ops must be inline so objtool ++ * detection of noinstr violations can work correctly. ++ */ ++#ifdef CONFIG_GENERIC_SCHED_CLOCK ++static __always_inline void hv_setup_sched_clock(void *sched_clock) ++{ ++ /* ++ * We're on an architecture with generic sched clock (not x86/x64). ++ * The Hyper-V sched clock read function returns nanoseconds, not ++ * the normal 100ns units of the Hyper-V synthetic clock. ++ */ ++ sched_clock_register(sched_clock, 64, NSEC_PER_SEC); ++} ++#elif defined CONFIG_PARAVIRT ++static __always_inline void hv_setup_sched_clock(void *sched_clock) ++{ ++ /* We're on x86/x64 *and* using PV ops */ ++ pv_ops.time.sched_clock = sched_clock; ++} ++#else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ ++static __always_inline void hv_setup_sched_clock(void *sched_clock) {} ++#endif /* CONFIG_GENERIC_SCHED_CLOCK */ ++ + static bool __init hv_init_tsc_clocksource(void) + { + u64 tsc_msr; +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:16 -0800 +Subject: [PATCH 19/53] Drivers: hv: vmbus: Move hyperv_report_panic_msg to + arch neutral code + +With the new Hyper-V MSR set function, hyperv_report_panic_msg() can be +architecture neutral, so move it out from under arch/x86 and merge into +hv_kmsg_dump(). This move also avoids needing a separate implementation +under arch/arm64. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Link: https://lore.kernel.org/r/1614721102-2241-5-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit b548a7742791e7818bc2780b2354b9714fd8f8d9) +--- + arch/x86/hyperv/hv_init.c | 27 --------------------------- + drivers/hv/vmbus_drv.c | 24 +++++++++++++++++++----- + include/asm-generic/mshyperv.h | 1 - + 3 files changed, 19 insertions(+), 33 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 4bdb3443b25e..f5a8f057ed89 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -571,33 +571,6 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) + } + EXPORT_SYMBOL_GPL(hyperv_report_panic); + +-/** +- * hyperv_report_panic_msg - report panic message to Hyper-V +- * @pa: physical address of the panic page containing the message +- * @size: size of the message in the page +- */ +-void hyperv_report_panic_msg(phys_addr_t pa, size_t size) +-{ +- /* +- * P3 to contain the physical address of the panic page & P4 to +- * contain the size of the panic data in that page. Rest of the +- * registers are no-op when the NOTIFY_MSG flag is set. +- */ +- wrmsrl(HV_X64_MSR_CRASH_P0, 0); +- wrmsrl(HV_X64_MSR_CRASH_P1, 0); +- wrmsrl(HV_X64_MSR_CRASH_P2, 0); +- wrmsrl(HV_X64_MSR_CRASH_P3, pa); +- wrmsrl(HV_X64_MSR_CRASH_P4, size); +- +- /* +- * Let Hyper-V know there is crash data available along with +- * the panic message. +- */ +- wrmsrl(HV_X64_MSR_CRASH_CTL, +- (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); +-} +-EXPORT_SYMBOL_GPL(hyperv_report_panic_msg); +- + bool hv_is_hyperv_initialized(void) + { + union hv_x64_msr_hypercall_contents hypercall_msr; +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index a5a402e776c7..4f9a1d12aa88 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1362,22 +1362,36 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) + { + size_t bytes_written; +- phys_addr_t panic_pa; + + /* We are only interested in panics. */ + if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) + return; + +- panic_pa = virt_to_phys(hv_panic_page); +- + /* + * Write dump contents to the page. No need to synchronize; panic should + * be single-threaded. + */ + kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, + &bytes_written); +- if (bytes_written) +- hyperv_report_panic_msg(panic_pa, bytes_written); ++ if (!bytes_written) ++ return; ++ /* ++ * P3 to contain the physical address of the panic page & P4 to ++ * contain the size of the panic data in that page. Rest of the ++ * registers are no-op when the NOTIFY_MSG flag is set. ++ */ ++ hv_set_register(HV_REGISTER_CRASH_P0, 0); ++ hv_set_register(HV_REGISTER_CRASH_P1, 0); ++ hv_set_register(HV_REGISTER_CRASH_P2, 0); ++ hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); ++ hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); ++ ++ /* ++ * Let Hyper-V know there is crash data available along with ++ * the panic message. ++ */ ++ hv_set_register(HV_REGISTER_CRASH_CTL, ++ (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); + } + + static struct kmsg_dumper hv_kmsg_dumper = { +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index 694b5bc3561c..bbc011390cbb 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -173,7 +173,6 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, + } + + void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); +-void hyperv_report_panic_msg(phys_addr_t pa, size_t size); + bool hv_is_hyperv_initialized(void); + bool hv_is_hibernation_supported(void); + enum hv_isolation_type hv_get_isolation_type(void); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:21 -0800 +Subject: [PATCH 20/53] clocksource/drivers/hyper-v: Set clocksource rating + based on Hyper-V feature + +On x86/x64, the TSC clocksource is available in a Hyper-V VM only if +Hyper-V provides the TSC_INVARIANT flag. The rating on the Hyper-V +Reference TSC page clocksource is currently set so that it will not +override the TSC clocksource in this case. Alternatively, if the TSC +clocksource is not available, then the Hyper-V clocksource is used. + +But on ARM64, the Hyper-V Reference TSC page clocksource should +override the ARM arch counter, since the Hyper-V clocksource provides +scaling and offsetting during live migrations that is not provided +for the ARM arch counter. + +To get the needed behavior for both x86/x64 and ARM64, tweak the +logic by defaulting the Hyper-V Reference TSC page clocksource +rating to a large value that will always override. If the Hyper-V +TSC_INVARIANT flag is set, then reduce the rating so that it will not +override the TSC. + +While the logic for getting there is slightly different, the net +result in the normal cases is no functional change. + +Signed-off-by: Michael Kelley +Acked-by: Daniel Lezcano +Link: https://lore.kernel.org/r/1614721102-2241-10-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 4c78738ead4e195c7032c31fe56135c1b00e1784) +--- + drivers/clocksource/hyperv_timer.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index cabd8453461f..c97e1b1e6653 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -302,14 +302,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); + * the other that uses the TSC reference page feature as defined in the + * TLFS. The MSR version is for compatibility with old versions of + * Hyper-V and 32-bit x86. The TSC reference page version is preferred. +- * +- * The Hyper-V clocksource ratings of 250 are chosen to be below the +- * TSC clocksource rating of 300. In configurations where Hyper-V offers +- * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource +- * is available and preferred. With the higher rating, it will be the +- * default. On older hardware and Hyper-V versions, the TSC is marked +- * "unstable", so no TSC clocksource is created and the selected Hyper-V +- * clocksource will be the default. + */ + + u64 (*hv_read_reference_counter)(void); +@@ -378,7 +370,7 @@ static int hv_cs_enable(struct clocksource *cs) + + static struct clocksource hyperv_cs_tsc = { + .name = "hyperv_clocksource_tsc_page", +- .rating = 250, ++ .rating = 500, + .read = read_hv_clock_tsc_cs, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +@@ -412,7 +404,7 @@ static u64 notrace read_hv_sched_clock_msr(void) + + static struct clocksource hyperv_cs_msr = { + .name = "hyperv_clocksource_msr", +- .rating = 250, ++ .rating = 500, + .read = read_hv_clock_msr_cs, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +@@ -453,6 +445,17 @@ static bool __init hv_init_tsc_clocksource(void) + if (hv_root_partition) + return false; + ++ /* ++ * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly ++ * handles frequency and offset changes due to live migration, ++ * pause/resume, and other VM management operations. So lower the ++ * Hyper-V Reference TSC rating, causing the generic TSC to be used. ++ * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference ++ * TSC will be preferred over the virtualized ARM64 arch counter. ++ */ ++ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) ++ hyperv_cs_tsc.rating = 250; ++ + hv_read_reference_counter = read_hv_clock_tsc; + phys_addr = virt_to_phys(hv_get_tsc_page()); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Zheng Yongjun +Date: Fri, 26 Mar 2021 14:49:42 +0800 +Subject: [PATCH 21/53] x86/hyperv: remove unused linux/version.h header + +That header is not needed in hv_proc.c. + +Reported-by: Hulk Robot +Signed-off-by: Yongjun Zheng +Link: https://lore.kernel.org/r/20210326064942.3263776-1-zhengyongjun3@huawei.com +Signed-off-by: Wei Liu +(cherry picked from commit 90b9bfa4707c85c02cc1b22b57bc8abc24a6a5f0) +--- + arch/x86/hyperv/hv_proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c +index 60461e598239..27e17ad3ba49 100644 +--- a/arch/x86/hyperv/hv_proc.c ++++ b/arch/x86/hyperv/hv_proc.c +@@ -1,6 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 + #include +-#include + #include + #include + #include +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Sunil Muthuswamy +Date: Tue, 23 Mar 2021 18:47:16 +0000 +Subject: [PATCH 22/53] x86/Hyper-V: Support for free page reporting + +Linux has support for free page reporting now (36e66c554b5c) for +virtualized environment. On Hyper-V when virtually backed VMs are +configured, Hyper-V will advertise cold memory discard capability, +when supported. This patch adds the support to hook into the free +page reporting infrastructure and leverage the Hyper-V cold memory +discard hint hypercall to report/free these pages back to the host. + +Signed-off-by: Sunil Muthuswamy +Tested-by: Matheus Castello +Reviewed-by: Michael Kelley +Tested-by: Nathan Chancellor +Link: https://lore.kernel.org/r/SN4PR2101MB0880121FA4E2FEC67F35C1DCC0649@SN4PR2101MB0880.namprd21.prod.outlook.com +Signed-off-by: Wei Liu +(cherry picked from commit 6dc2a774cb4fdb524b7eb0b8db74198a1b4815ea) +--- + arch/x86/hyperv/hv_init.c | 51 +++++++++++++++++- + arch/x86/kernel/cpu/mshyperv.c | 9 ++-- + drivers/hv/Kconfig | 1 + + drivers/hv/hv_balloon.c | 89 +++++++++++++++++++++++++++++++ + include/asm-generic/hyperv-tlfs.h | 35 +++++++++++- + include/asm-generic/mshyperv.h | 3 +- + 6 files changed, 180 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index f5a8f057ed89..ea81e5608e55 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -498,6 +498,8 @@ void __init hyperv_init(void) + x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; + #endif + ++ /* Query the VMs extended capability once, so that it can be cached. */ ++ hv_query_ext_cap(0); + return; + + remove_cpuhp_state: +@@ -601,7 +603,7 @@ EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); + + enum hv_isolation_type hv_get_isolation_type(void) + { +- if (!(ms_hyperv.features_b & HV_ISOLATION)) ++ if (!(ms_hyperv.priv_high & HV_ISOLATION)) + return HV_ISOLATION_TYPE_NONE; + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); + } +@@ -612,3 +614,50 @@ bool hv_is_isolation_supported(void) + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; + } + EXPORT_SYMBOL_GPL(hv_is_isolation_supported); ++ ++/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ ++bool hv_query_ext_cap(u64 cap_query) ++{ ++ /* ++ * The address of the 'hv_extended_cap' variable will be used as an ++ * output parameter to the hypercall below and so it should be ++ * compatible with 'virt_to_phys'. Which means, it's address should be ++ * directly mapped. Use 'static' to keep it compatible; stack variables ++ * can be virtually mapped, making them imcompatible with ++ * 'virt_to_phys'. ++ * Hypercall input/output addresses should also be 8-byte aligned. ++ */ ++ static u64 hv_extended_cap __aligned(8); ++ static bool hv_extended_cap_queried; ++ u64 status; ++ ++ /* ++ * Querying extended capabilities is an extended hypercall. Check if the ++ * partition supports extended hypercall, first. ++ */ ++ if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) ++ return false; ++ ++ /* Extended capabilities do not change at runtime. */ ++ if (hv_extended_cap_queried) ++ return hv_extended_cap & cap_query; ++ ++ status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, ++ &hv_extended_cap); ++ ++ /* ++ * The query extended capabilities hypercall should not fail under ++ * any normal circumstances. Avoid repeatedly making the hypercall, on ++ * error. ++ */ ++ hv_extended_cap_queried = true; ++ status &= HV_HYPERCALL_RESULT_MASK; ++ if (status != HV_STATUS_SUCCESS) { ++ pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", ++ status); ++ return false; ++ } ++ ++ return hv_extended_cap & cap_query; ++} ++EXPORT_SYMBOL_GPL(hv_query_ext_cap); +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 9fde437f53ea..16a5a901cde3 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -274,12 +274,13 @@ static void __init ms_hyperv_init_platform(void) + * Extract the features and hints + */ + ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); +- ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES); ++ ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES); + ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); + ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); + +- pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", +- ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ++ pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n", ++ ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints, ++ ms_hyperv.misc_features); + + ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); + ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); +@@ -325,7 +326,7 @@ static void __init ms_hyperv_init_platform(void) + x86_platform.calibrate_cpu = hv_get_tsc_khz; + } + +- if (ms_hyperv.features_b & HV_ISOLATION) { ++ if (ms_hyperv.priv_high & HV_ISOLATION) { + ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); + ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); + +diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig +index 79e5356a737a..66c794d92391 100644 +--- a/drivers/hv/Kconfig ++++ b/drivers/hv/Kconfig +@@ -23,6 +23,7 @@ config HYPERV_UTILS + config HYPERV_BALLOON + tristate "Microsoft Hyper-V Balloon driver" + depends on HYPERV ++ select PAGE_REPORTING + help + Select this option to enable Hyper-V Balloon driver. + +diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c +index eb56e09ae15f..61b995ff00e3 100644 +--- a/drivers/hv/hv_balloon.c ++++ b/drivers/hv/hv_balloon.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -563,6 +564,8 @@ struct hv_dynmem_device { + * The negotiated version agreed by host. + */ + __u32 version; ++ ++ struct page_reporting_dev_info pr_dev_info; + }; + + static struct hv_dynmem_device dm_device; +@@ -1565,6 +1568,89 @@ static void balloon_onchannelcallback(void *context) + + } + ++/* Hyper-V only supports reporting 2MB pages or higher */ ++#define HV_MIN_PAGE_REPORTING_ORDER 9 ++#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER) ++static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, ++ struct scatterlist *sgl, unsigned int nents) ++{ ++ unsigned long flags; ++ struct hv_memory_hint *hint; ++ int i; ++ u64 status; ++ struct scatterlist *sg; ++ ++ WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); ++ WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN); ++ local_irq_save(flags); ++ hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg); ++ if (!hint) { ++ local_irq_restore(flags); ++ return -ENOSPC; ++ } ++ ++ hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD; ++ hint->reserved = 0; ++ for_each_sg(sgl, sg, nents, i) { ++ union hv_gpa_page_range *range; ++ ++ range = &hint->ranges[i]; ++ range->address_space = 0; ++ /* page reporting only reports 2MB pages or higher */ ++ range->page.largepage = 1; ++ range->page.additional_pages = ++ (sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1; ++ range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; ++ range->base_large_pfn = ++ page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER; ++ } ++ ++ status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, ++ hint, NULL); ++ local_irq_restore(flags); ++ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { ++ pr_err("Cold memory discard hypercall failed with status %llx\n", ++ status); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void enable_page_reporting(void) ++{ ++ int ret; ++ ++ /* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */ ++ if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) { ++ pr_debug("Cold memory discard is only supported on 2MB pages and above\n"); ++ return; ++ } ++ ++ if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { ++ pr_debug("Cold memory discard hint not supported by Hyper-V\n"); ++ return; ++ } ++ ++ BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); ++ dm_device.pr_dev_info.report = hv_free_page_report; ++ ret = page_reporting_register(&dm_device.pr_dev_info); ++ if (ret < 0) { ++ dm_device.pr_dev_info.report = NULL; ++ pr_err("Failed to enable cold memory discard: %d\n", ret); ++ } else { ++ pr_info("Cold memory discard hint enabled\n"); ++ } ++} ++ ++static void disable_page_reporting(void) ++{ ++ if (dm_device.pr_dev_info.report) { ++ page_reporting_unregister(&dm_device.pr_dev_info); ++ dm_device.pr_dev_info.report = NULL; ++ } ++} ++ + static int balloon_connect_vsp(struct hv_device *dev) + { + struct dm_version_request version_req; +@@ -1710,6 +1796,7 @@ static int balloon_probe(struct hv_device *dev, + if (ret != 0) + return ret; + ++ enable_page_reporting(); + dm_device.state = DM_INITIALIZED; + + dm_device.thread = +@@ -1724,6 +1811,7 @@ static int balloon_probe(struct hv_device *dev, + probe_error: + dm_device.state = DM_INIT_ERROR; + dm_device.thread = NULL; ++ disable_page_reporting(); + vmbus_close(dev->channel); + #ifdef CONFIG_MEMORY_HOTPLUG + unregister_memory_notifier(&hv_memory_nb); +@@ -1746,6 +1834,7 @@ static int balloon_remove(struct hv_device *dev) + cancel_work_sync(&dm->ha_wrk.wrk); + + kthread_stop(dm->thread); ++ disable_page_reporting(); + vmbus_close(dev->channel); + #ifdef CONFIG_MEMORY_HOTPLUG + unregister_memory_notifier(&hv_memory_nb); +diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h +index 9cf10837d005..515c3fb06ab3 100644 +--- a/include/asm-generic/hyperv-tlfs.h ++++ b/include/asm-generic/hyperv-tlfs.h +@@ -89,9 +89,9 @@ + #define HV_ACCESS_STATS BIT(8) + #define HV_DEBUGGING BIT(11) + #define HV_CPU_MANAGEMENT BIT(12) ++#define HV_ENABLE_EXTENDED_HYPERCALLS BIT(20) + #define HV_ISOLATION BIT(22) + +- + /* + * TSC page layout. + */ +@@ -159,11 +159,18 @@ struct ms_hyperv_tsc_page { + #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af + #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 + ++/* Extended hypercalls */ ++#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 ++#define HV_EXT_CALL_MEMORY_HEAT_HINT 0x8003 ++ + #define HV_FLUSH_ALL_PROCESSORS BIT(0) + #define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) + #define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) + #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + ++/* Extended capability bits */ ++#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8) ++ + enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +@@ -408,8 +415,10 @@ struct hv_guest_mapping_flush { + * by the bitwidth of "additional_pages" in union hv_gpa_page_range. + */ + #define HV_MAX_FLUSH_PAGES (2048) ++#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB 0 ++#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB 1 + +-/* HvFlushGuestPhysicalAddressList hypercall */ ++/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */ + union hv_gpa_page_range { + u64 address_space; + struct { +@@ -417,6 +426,12 @@ union hv_gpa_page_range { + u64 largepage:1; + u64 basepfn:52; + } page; ++ struct { ++ u64 reserved:12; ++ u64 page_size:1; ++ u64 reserved1:8; ++ u64 base_large_pfn:43; ++ }; + }; + + /* +@@ -774,4 +789,20 @@ struct hv_input_unmap_device_interrupt { + #define HV_SOURCE_SHADOW_NONE 0x0 + #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 + ++/* ++ * The whole argument should fit in a page to be able to pass to the hypervisor ++ * in one hypercall. ++ */ ++#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES \ ++ ((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \ ++ sizeof(union hv_gpa_page_range)) ++ ++/* HvExtCallMemoryHeatHint hypercall */ ++#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD 2 ++struct hv_memory_hint { ++ u64 type:2; ++ u64 reserved:62; ++ union hv_gpa_page_range ranges[]; ++} __packed; ++ + #endif +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index bbc011390cbb..c749d1c4f682 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -27,7 +27,7 @@ + + struct ms_hyperv_info { + u32 features; +- u32 features_b; ++ u32 priv_high; + u32 misc_features; + u32 hints; + u32 nested_features; +@@ -178,6 +178,7 @@ bool hv_is_hibernation_supported(void); + enum hv_isolation_type hv_get_isolation_type(void); + bool hv_is_isolation_supported(void); + void hyperv_cleanup(void); ++bool hv_query_ext_cap(u64 cap_query); + #else /* CONFIG_HYPERV */ + static inline bool hv_is_hyperv_initialized(void) { return false; } + static inline bool hv_is_hibernation_supported(void) { return false; } +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Joseph Salisbury +Date: Fri, 16 Apr 2021 17:43:02 -0700 +Subject: [PATCH 23/53] x86/hyperv: Move hv_do_rep_hypercall to asm-generic + +This patch makes no functional changes. It simply moves hv_do_rep_hypercall() +out of arch/x86/include/asm/mshyperv.h and into asm-generic/mshyperv.h + +hv_do_rep_hypercall() is architecture independent, so it makes sense that it +should be in the architecture independent mshyperv.h, not in the x86-specific +mshyperv.h. + +This is done in preperation for a follow up patch which creates a consistent +pattern for checking Hyper-V hypercall status. + +Signed-off-by: Joseph Salisbury +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/1618620183-9967-1-git-send-email-joseph.salisbury@linux.microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 6523592cee4650c6aa997d69cd0045a01e07a1ef) +--- + arch/x86/include/asm/mshyperv.h | 32 -------------------------------- + include/asm-generic/mshyperv.h | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 31 insertions(+), 32 deletions(-) + +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 212d34f80bb4..2cf1afb55fea 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -178,38 +178,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) + return hv_status; + } + +-/* +- * Rep hypercalls. Callers of this functions are supposed to ensure that +- * rep_count and varhead_size comply with Hyper-V hypercall definition. +- */ +-static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, +- void *input, void *output) +-{ +- u64 control = code; +- u64 status; +- u16 rep_comp; +- +- control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; +- control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; +- +- do { +- status = hv_do_hypercall(control, input, output); +- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) +- return status; +- +- /* Bits 32-43 of status have 'Reps completed' data. */ +- rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> +- HV_HYPERCALL_REP_COMP_OFFSET; +- +- control &= ~HV_HYPERCALL_REP_START_MASK; +- control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; +- +- touch_nmi_watchdog(); +- } while (rep_comp < rep_count); +- +- return status; +-} +- + extern struct hv_vp_assist_page **hv_vp_assist_page; + + static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index c749d1c4f682..2f01140bbf66 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -41,6 +41,37 @@ extern struct ms_hyperv_info ms_hyperv; + extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); + extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); + ++/* ++ * Rep hypercalls. Callers of this functions are supposed to ensure that ++ * rep_count and varhead_size comply with Hyper-V hypercall definition. ++ */ ++static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, ++ void *input, void *output) ++{ ++ u64 control = code; ++ u64 status; ++ u16 rep_comp; ++ ++ control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; ++ control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; ++ ++ do { ++ status = hv_do_hypercall(control, input, output); ++ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) ++ return status; ++ ++ /* Bits 32-43 of status have 'Reps completed' data. */ ++ rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> ++ HV_HYPERCALL_REP_COMP_OFFSET; ++ ++ control &= ~HV_HYPERCALL_REP_START_MASK; ++ control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; ++ ++ touch_nmi_watchdog(); ++ } while (rep_comp < rep_count); ++ ++ return status; ++} + + /* Generate the guest OS identifier as described in the Hyper-V TLFS */ + static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Joseph Salisbury +Date: Fri, 16 Apr 2021 17:43:03 -0700 +Subject: [PATCH 24/53] drivers: hv: Create a consistent pattern for checking + Hyper-V hypercall status + +There is not a consistent pattern for checking Hyper-V hypercall status. +Existing code uses a number of variants. The variants work, but a consistent +pattern would improve the readability of the code, and be more conformant +to what the Hyper-V TLFS says about hypercall status. + +Implemented new helper functions hv_result(), hv_result_success(), and +hv_repcomp(). Changed the places where hv_do_hypercall() and related variants +are used to use the helper functions. + +Signed-off-by: Joseph Salisbury +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/1618620183-9967-2-git-send-email-joseph.salisbury@linux.microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 753ed9c95c37d058e50e7d42bbe296ee0bf6670d) +--- + arch/x86/hyperv/hv_apic.c | 16 +++++++++------- + arch/x86/hyperv/hv_init.c | 2 +- + arch/x86/hyperv/hv_proc.c | 25 ++++++++++--------------- + arch/x86/hyperv/irqdomain.c | 6 +++--- + arch/x86/hyperv/mmu.c | 8 ++++---- + arch/x86/hyperv/nested.c | 8 ++++---- + arch/x86/include/asm/mshyperv.h | 1 + + drivers/hv/hv.c | 2 +- + drivers/pci/controller/pci-hyperv.c | 2 +- + include/asm-generic/mshyperv.h | 25 ++++++++++++++++++++----- + 10 files changed, 54 insertions(+), 41 deletions(-) + +diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c +index 284e73661a18..ca581b24974a 100644 +--- a/arch/x86/hyperv/hv_apic.c ++++ b/arch/x86/hyperv/hv_apic.c +@@ -103,7 +103,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) + struct hv_send_ipi_ex *ipi_arg; + unsigned long flags; + int nr_bank = 0; +- int ret = 1; ++ u64 status = HV_STATUS_INVALID_PARAMETER; + + if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return false; +@@ -128,19 +128,19 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) + if (!nr_bank) + ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; + +- ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, ++ status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + ipi_arg, NULL); + + ipi_mask_ex_done: + local_irq_restore(flags); +- return ((ret == 0) ? true : false); ++ return hv_result_success(status); + } + + static bool __send_ipi_mask(const struct cpumask *mask, int vector) + { + int cur_cpu, vcpu; + struct hv_send_ipi ipi_arg; +- int ret = 1; ++ u64 status; + + trace_hyperv_send_ipi_mask(mask, vector); + +@@ -184,9 +184,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) + __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask); + } + +- ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, ++ status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, + ipi_arg.cpu_mask); +- return ((ret == 0) ? true : false); ++ return hv_result_success(status); + + do_ex_hypercall: + return __send_ipi_mask_ex(mask, vector); +@@ -195,6 +195,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) + static bool __send_ipi_one(int cpu, int vector) + { + int vp = hv_cpu_number_to_vp_number(cpu); ++ u64 status; + + trace_hyperv_send_ipi_one(cpu, vector); + +@@ -207,7 +208,8 @@ static bool __send_ipi_one(int cpu, int vector) + if (vp >= 64) + return __send_ipi_mask_ex(cpumask_of(cpu), vector); + +- return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); ++ status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); ++ return hv_result_success(status); + } + + static void hv_send_ipi(int cpu, int vector) +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index ea81e5608e55..1c194e287866 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -347,7 +347,7 @@ static void __init hv_get_partition_id(void) + local_irq_save(flags); + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); +- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { ++ if (!hv_result_success(status)) { + /* No point in proceeding if this failed */ + pr_err("Failed to get partition ID: %lld\n", status); + BUG(); +diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c +index 27e17ad3ba49..68a0843d4750 100644 +--- a/arch/x86/hyperv/hv_proc.c ++++ b/arch/x86/hyperv/hv_proc.c +@@ -92,10 +92,9 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) + status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, + page_count, 0, input_page, NULL); + local_irq_restore(flags); +- +- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { ++ if (!hv_result_success(status)) { + pr_err("Failed to deposit pages: %lld\n", status); +- ret = status; ++ ret = hv_result(status); + goto err_free_allocations; + } + +@@ -121,7 +120,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) + struct hv_add_logical_processor_out *output; + u64 status; + unsigned long flags; +- int ret = 0; ++ int ret = HV_STATUS_SUCCESS; + int pxm = node_to_pxm(node); + + /* +@@ -147,13 +146,11 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) + input, output); + local_irq_restore(flags); + +- status &= HV_HYPERCALL_RESULT_MASK; +- +- if (status != HV_STATUS_INSUFFICIENT_MEMORY) { +- if (status != HV_STATUS_SUCCESS) { ++ if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { ++ if (!hv_result_success(status)) { + pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, + lp_index, apic_id, status); +- ret = status; ++ ret = hv_result(status); + } + break; + } +@@ -168,7 +165,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) + struct hv_create_vp *input; + u64 status; + unsigned long irq_flags; +- int ret = 0; ++ int ret = HV_STATUS_SUCCESS; + int pxm = node_to_pxm(node); + + /* Root VPs don't seem to need pages deposited */ +@@ -199,13 +196,11 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) + status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); + local_irq_restore(irq_flags); + +- status &= HV_HYPERCALL_RESULT_MASK; +- +- if (status != HV_STATUS_INSUFFICIENT_MEMORY) { +- if (status != HV_STATUS_SUCCESS) { ++ if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { ++ if (!hv_result_success(status)) { + pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, + vp_index, flags, status); +- ret = status; ++ ret = hv_result(status); + } + break; + } +diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c +index 4421a8d92e23..514fc64e23d5 100644 +--- a/arch/x86/hyperv/irqdomain.c ++++ b/arch/x86/hyperv/irqdomain.c +@@ -63,10 +63,10 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, + + local_irq_restore(flags); + +- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) ++ if (!hv_result_success(status)) + pr_err("%s: hypercall failed, status %lld\n", __func__, status); + +- return status & HV_HYPERCALL_RESULT_MASK; ++ return hv_result(status); + } + + static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) +@@ -88,7 +88,7 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) + status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); + local_irq_restore(flags); + +- return status & HV_HYPERCALL_RESULT_MASK; ++ return hv_result(status); + } + + #ifdef CONFIG_PCI_MSI +diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c +index 2c87350c1fb0..c0ba8874d9cb 100644 +--- a/arch/x86/hyperv/mmu.c ++++ b/arch/x86/hyperv/mmu.c +@@ -58,7 +58,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, + int cpu, vcpu, gva_n, max_gvas; + struct hv_tlb_flush **flush_pcpu; + struct hv_tlb_flush *flush; +- u64 status = U64_MAX; ++ u64 status; + unsigned long flags; + + trace_hyperv_mmu_flush_tlb_others(cpus, info); +@@ -161,7 +161,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, + check_status: + local_irq_restore(flags); + +- if (!(status & HV_HYPERCALL_RESULT_MASK)) ++ if (hv_result_success(status)) + return; + do_native: + native_flush_tlb_others(cpus, info); +@@ -176,7 +176,7 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, + u64 status; + + if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) +- return U64_MAX; ++ return HV_STATUS_INVALID_PARAMETER; + + flush_pcpu = (struct hv_tlb_flush_ex **) + this_cpu_ptr(hyperv_pcpu_input_arg); +@@ -201,7 +201,7 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); + if (nr_bank < 0) +- return U64_MAX; ++ return HV_STATUS_INVALID_PARAMETER; + + /* + * We can flush not more than max_gvas with one hypercall. Flush the +diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c +index dd0a843f766d..5d70968c8538 100644 +--- a/arch/x86/hyperv/nested.c ++++ b/arch/x86/hyperv/nested.c +@@ -47,7 +47,7 @@ int hyperv_flush_guest_mapping(u64 as) + flush, NULL); + local_irq_restore(flags); + +- if (!(status & HV_HYPERCALL_RESULT_MASK)) ++ if (hv_result_success(status)) + ret = 0; + + fault: +@@ -92,7 +92,7 @@ int hyperv_flush_guest_mapping_range(u64 as, + { + struct hv_guest_mapping_flush_list **flush_pcpu; + struct hv_guest_mapping_flush_list *flush; +- u64 status = 0; ++ u64 status; + unsigned long flags; + int ret = -ENOTSUPP; + int gpa_n = 0; +@@ -125,10 +125,10 @@ int hyperv_flush_guest_mapping_range(u64 as, + + local_irq_restore(flags); + +- if (!(status & HV_HYPERCALL_RESULT_MASK)) ++ if (hv_result_success(status)) + ret = 0; + else +- ret = status; ++ ret = hv_result(status); + fault: + trace_hyperv_nested_flush_guest_mapping_range(as, ret); + return ret; +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 2cf1afb55fea..a34f31dd5d93 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + + typedef int (*hyperv_fill_flush_list_func)( + struct hv_guest_mapping_flush_list *flush, +diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c +index cca8d5ea61f0..3b1ba6c607a1 100644 +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -104,7 +104,7 @@ int hv_post_message(union hv_connection_id connection_id, + */ + put_cpu_ptr(hv_cpu); + +- return status & 0xFFFF; ++ return hv_result(status); + } + + int hv_synic_alloc(void) +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index 7fd8cd554675..98aa92742198 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -1291,7 +1291,7 @@ static void hv_irq_unmask(struct irq_data *data) + * resumes, hv_pci_restore_msi_state() is able to correctly restore + * the interrupt with the correct affinity. + */ +- if (res && hbus->state != hv_pcibus_removing) ++ if (!hv_result_success(res) && hbus->state != hv_pcibus_removing) + dev_err(&hbus->hdev->device, + "%s() failed: %#llx", __func__, res); + +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index 2f01140bbf66..7a2492fb0ee1 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -41,6 +41,24 @@ extern struct ms_hyperv_info ms_hyperv; + extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); + extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); + ++/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ ++static inline int hv_result(u64 status) ++{ ++ return status & HV_HYPERCALL_RESULT_MASK; ++} ++ ++static inline bool hv_result_success(u64 status) ++{ ++ return hv_result(status) == HV_STATUS_SUCCESS; ++} ++ ++static inline unsigned int hv_repcomp(u64 status) ++{ ++ /* Bits [43:32] of status have 'Reps completed' data. */ ++ return (status & HV_HYPERCALL_REP_COMP_MASK) >> ++ HV_HYPERCALL_REP_COMP_OFFSET; ++} ++ + /* + * Rep hypercalls. Callers of this functions are supposed to ensure that + * rep_count and varhead_size comply with Hyper-V hypercall definition. +@@ -57,12 +75,10 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, + + do { + status = hv_do_hypercall(control, input, output); +- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) ++ if (!hv_result_success(status)) + return status; + +- /* Bits 32-43 of status have 'Reps completed' data. */ +- rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> +- HV_HYPERCALL_REP_COMP_OFFSET; ++ rep_comp = hv_repcomp(status); + + control &= ~HV_HYPERCALL_REP_START_MASK; + control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; +@@ -87,7 +103,6 @@ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, + return guest_id; + } + +- + /* Free the message slot and signal end-of-message if required */ + static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) + { +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Wed, 12 May 2021 01:06:49 -0700 +Subject: [PATCH 25/53] PCI: hv: Remove bus device removal unused + refcount/functions + +With the new method of flushing/stopping the workqueue before doing bus +removal, the old mechanism of using refcount and wait for completion +is no longer needed. Remove those dead code. + +Link: https://lore.kernel.org/r/1620806809-31055-1-git-send-email-longli@linuxonhyperv.com +Signed-off-by: Long Li +[lorenzo.pieralisi@arm.com: Reworded subject] +Signed-off-by: Lorenzo Pieralisi +Reviewed-by: Michael Kelley +(cherry picked from commit 326dc2e1e59a98c61c3c71616496422af522678c) +--- + drivers/pci/controller/pci-hyperv.c | 34 +++-------------------------- + 1 file changed, 3 insertions(+), 31 deletions(-) + +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index 98aa92742198..64df545b6dcf 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -452,7 +452,6 @@ struct hv_pcibus_device { + /* Protocol version negotiated with the host */ + enum pci_protocol_version_t protocol_version; + enum hv_pcibus_state state; +- refcount_t remove_lock; + struct hv_device *hdev; + resource_size_t low_mmio_space; + resource_size_t high_mmio_space; +@@ -460,7 +459,6 @@ struct hv_pcibus_device { + struct resource *low_mmio_res; + struct resource *high_mmio_res; + struct completion *survey_event; +- struct completion remove_event; + struct pci_bus *pci_bus; + spinlock_t config_lock; /* Avoid two threads writing index page */ + spinlock_t device_list_lock; /* Protect lists below */ +@@ -593,9 +591,6 @@ static void put_pcichild(struct hv_pci_dev *hpdev) + kfree(hpdev); + } + +-static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); +-static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); +- + /* + * There is no good way to get notified from vmbus_onoffer_rescind(), + * so let's use polling here, since this is not a hot path. +@@ -2067,10 +2062,8 @@ static void pci_devices_present_work(struct work_struct *work) + } + spin_unlock_irqrestore(&hbus->device_list_lock, flags); + +- if (!dr) { +- put_hvpcibus(hbus); ++ if (!dr) + return; +- } + + /* First, mark all existing children as reported missing. */ + spin_lock_irqsave(&hbus->device_list_lock, flags); +@@ -2153,7 +2146,6 @@ static void pci_devices_present_work(struct work_struct *work) + break; + } + +- put_hvpcibus(hbus); + kfree(dr); + } + +@@ -2194,12 +2186,10 @@ static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus, + list_add_tail(&dr->list_entry, &hbus->dr_list); + spin_unlock_irqrestore(&hbus->device_list_lock, flags); + +- if (pending_dr) { ++ if (pending_dr) + kfree(dr_wrk); +- } else { +- get_hvpcibus(hbus); ++ else + queue_work(hbus->wq, &dr_wrk->wrk); +- } + + return 0; + } +@@ -2342,8 +2332,6 @@ static void hv_eject_device_work(struct work_struct *work) + put_pcichild(hpdev); + put_pcichild(hpdev); + /* hpdev has been freed. Do not use it any more. */ +- +- put_hvpcibus(hbus); + } + + /** +@@ -2367,7 +2355,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) + hpdev->state = hv_pcichild_ejecting; + get_pcichild(hpdev); + INIT_WORK(&hpdev->wrk, hv_eject_device_work); +- get_hvpcibus(hbus); + queue_work(hbus->wq, &hpdev->wrk); + } + +@@ -2967,17 +2954,6 @@ static int hv_send_resources_released(struct hv_device *hdev) + return 0; + } + +-static void get_hvpcibus(struct hv_pcibus_device *hbus) +-{ +- refcount_inc(&hbus->remove_lock); +-} +- +-static void put_hvpcibus(struct hv_pcibus_device *hbus) +-{ +- if (refcount_dec_and_test(&hbus->remove_lock)) +- complete(&hbus->remove_event); +-} +- + #define HVPCI_DOM_MAP_SIZE (64 * 1024) + static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); + +@@ -3097,14 +3073,12 @@ static int hv_pci_probe(struct hv_device *hdev, + hbus->sysdata.domain = dom; + + hbus->hdev = hdev; +- refcount_set(&hbus->remove_lock, 1); + INIT_LIST_HEAD(&hbus->children); + INIT_LIST_HEAD(&hbus->dr_list); + INIT_LIST_HEAD(&hbus->resources_for_children); + spin_lock_init(&hbus->config_lock); + spin_lock_init(&hbus->device_list_lock); + spin_lock_init(&hbus->retarget_msi_interrupt_lock); +- init_completion(&hbus->remove_event); + hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, + hbus->sysdata.domain); + if (!hbus->wq) { +@@ -3348,8 +3322,6 @@ static int hv_pci_remove(struct hv_device *hdev) + hv_pci_free_bridge_windows(hbus); + irq_domain_remove(hbus->irq_domain); + irq_domain_free_fwnode(hbus->sysdata.fwnode); +- put_hvpcibus(hbus); +- wait_for_completion(&hbus->remove_event); + + hv_put_dom_num(hbus->sysdata.domain); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Sudeep Holla +Date: Tue, 18 May 2021 17:36:18 +0100 +Subject: [PATCH 26/53] arm64: smccc: Add support for SMCCCv1.2 extended + input/output registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +SMCCC v1.2 allows x8-x17 to be used as parameter registers and x4—x17 +to be used as result registers in SMC64/HVC64. Arm Firmware Framework +for Armv8-A specification makes use of x0-x7 as parameter and result +registers. There are other users like Hyper-V who intend to use beyond +x0-x7 as well. + +Current SMCCC interface in the kernel just use x0-x7 as parameter and +x0-x3 as result registers as required by SMCCCv1.0. Let us add new +interface to support this extended set of input/output registers namely +x0-x17 as both parameter and result registers. + +Acked-by: Mark Rutland +Tested-by: Michael Kelley +Reviewed-by: Michael Kelley +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Sudeep Holla +Reviewed-by: Mark Brown +Link: https://lore.kernel.org/r/20210518163618.43950-1-sudeep.holla@arm.com +Signed-off-by: Will Deacon +(cherry picked from commit 3fdc0cb59d97f87e2cc708d424f1538e31744286) +--- + arch/arm64/kernel/asm-offsets.c | 9 ++++++ + arch/arm64/kernel/smccc-call.S | 57 +++++++++++++++++++++++++++++++++ + include/linux/arm-smccc.h | 55 +++++++++++++++++++++++++++++++ + 3 files changed, 121 insertions(+) + +diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c +index 7d32fc959b1a..652b9e974fdc 100644 +--- a/arch/arm64/kernel/asm-offsets.c ++++ b/arch/arm64/kernel/asm-offsets.c +@@ -122,6 +122,15 @@ int main(void) + DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); + DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X0_OFFS, offsetof(struct arm_smccc_1_2_regs, a0)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X2_OFFS, offsetof(struct arm_smccc_1_2_regs, a2)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X4_OFFS, offsetof(struct arm_smccc_1_2_regs, a4)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X6_OFFS, offsetof(struct arm_smccc_1_2_regs, a6)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X8_OFFS, offsetof(struct arm_smccc_1_2_regs, a8)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X10_OFFS, offsetof(struct arm_smccc_1_2_regs, a10)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X12_OFFS, offsetof(struct arm_smccc_1_2_regs, a12)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X14_OFFS, offsetof(struct arm_smccc_1_2_regs, a14)); ++ DEFINE(ARM_SMCCC_1_2_REGS_X16_OFFS, offsetof(struct arm_smccc_1_2_regs, a16)); + BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); +diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S +index d62447964ed9..2def9d0dd3dd 100644 +--- a/arch/arm64/kernel/smccc-call.S ++++ b/arch/arm64/kernel/smccc-call.S +@@ -43,3 +43,60 @@ SYM_FUNC_START(__arm_smccc_hvc) + SMCCC hvc + SYM_FUNC_END(__arm_smccc_hvc) + EXPORT_SYMBOL(__arm_smccc_hvc) ++ ++ .macro SMCCC_1_2 instr ++ /* Save `res` and free a GPR that won't be clobbered */ ++ stp x1, x19, [sp, #-16]! ++ ++ /* Ensure `args` won't be clobbered while loading regs in next step */ ++ mov x19, x0 ++ ++ /* Load the registers x0 - x17 from the struct arm_smccc_1_2_regs */ ++ ldp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS] ++ ldp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS] ++ ldp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS] ++ ldp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS] ++ ldp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS] ++ ldp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS] ++ ldp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS] ++ ldp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS] ++ ldp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS] ++ ++ \instr #0 ++ ++ /* Load the `res` from the stack */ ++ ldr x19, [sp] ++ ++ /* Store the registers x0 - x17 into the result structure */ ++ stp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS] ++ stp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS] ++ stp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS] ++ stp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS] ++ stp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS] ++ stp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS] ++ stp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS] ++ stp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS] ++ stp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS] ++ ++ /* Restore original x19 */ ++ ldp xzr, x19, [sp], #16 ++ ret ++.endm ++ ++/* ++ * void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args, ++ * struct arm_smccc_1_2_regs *res); ++ */ ++SYM_FUNC_START(arm_smccc_1_2_hvc) ++ SMCCC_1_2 hvc ++SYM_FUNC_END(arm_smccc_1_2_hvc) ++EXPORT_SYMBOL(arm_smccc_1_2_hvc) ++ ++/* ++ * void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args, ++ * struct arm_smccc_1_2_regs *res); ++ */ ++SYM_FUNC_START(arm_smccc_1_2_smc) ++ SMCCC_1_2 smc ++SYM_FUNC_END(arm_smccc_1_2_smc) ++EXPORT_SYMBOL(arm_smccc_1_2_smc) +diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h +index f860645f6512..ee2d336ff1c1 100644 +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -155,6 +155,61 @@ struct arm_smccc_res { + unsigned long a3; + }; + ++#ifdef CONFIG_ARM64 ++/** ++ * struct arm_smccc_1_2_regs - Arguments for or Results from SMC/HVC call ++ * @a0-a17 argument values from registers 0 to 17 ++ */ ++struct arm_smccc_1_2_regs { ++ unsigned long a0; ++ unsigned long a1; ++ unsigned long a2; ++ unsigned long a3; ++ unsigned long a4; ++ unsigned long a5; ++ unsigned long a6; ++ unsigned long a7; ++ unsigned long a8; ++ unsigned long a9; ++ unsigned long a10; ++ unsigned long a11; ++ unsigned long a12; ++ unsigned long a13; ++ unsigned long a14; ++ unsigned long a15; ++ unsigned long a16; ++ unsigned long a17; ++}; ++ ++/** ++ * arm_smccc_1_2_hvc() - make HVC calls ++ * @args: arguments passed via struct arm_smccc_1_2_regs ++ * @res: result values via struct arm_smccc_1_2_regs ++ * ++ * This function is used to make HVC calls following SMC Calling Convention ++ * v1.2 or above. The content of the supplied param are copied from the ++ * structure to registers prior to the HVC instruction. The return values ++ * are updated with the content from registers on return from the HVC ++ * instruction. ++ */ ++asmlinkage void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args, ++ struct arm_smccc_1_2_regs *res); ++ ++/** ++ * arm_smccc_1_2_smc() - make SMC calls ++ * @args: arguments passed via struct arm_smccc_1_2_regs ++ * @res: result values via struct arm_smccc_1_2_regs ++ * ++ * This function is used to make SMC calls following SMC Calling Convention ++ * v1.2 or above. The content of the supplied param are copied from the ++ * structure to registers prior to the SMC instruction. The return values ++ * are updated with the content from registers on return from the SMC ++ * instruction. ++ */ ++asmlinkage void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args, ++ struct arm_smccc_1_2_regs *res); ++#endif ++ + /** + * struct arm_smccc_quirk - Contains quirk information + * @id: quirk identification +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 2 Jun 2021 14:36:44 -0700 +Subject: [PATCH 27/53] Drivers: hv: Move Hyper-V extended capability check to + arch neutral code + +The extended capability query code is currently under arch/x86, but it +is architecture neutral, and is used by arch neutral code in the Hyper-V +balloon driver. Hence the balloon driver fails to build on other +architectures. + +Fix by moving the ext cap code out from arch/x86. Because it is also +called from built-in architecture specific code, it can't be in a module, +so the Makefile treats as built-in even when CONFIG_HYPERV is "m". Also +drivers/Makefile is tweaked because this is the first occurrence of a +Hyper-V file that is built-in even when CONFIG_HYPERV is "m". + +While here, update the hypercall status check to use the new helper +function instead of open coding. No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Sunil Muthuswamy +Link: https://lore.kernel.org/r/1622669804-2016-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit a4d7e8ae4a541557d7a2c815835b786c18c3613c) +--- + arch/x86/hyperv/hv_init.c | 47 ---------------------------- + drivers/Makefile | 2 +- + drivers/hv/Makefile | 3 ++ + drivers/hv/hv_common.c | 66 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 70 insertions(+), 48 deletions(-) + create mode 100644 drivers/hv/hv_common.c + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 1c194e287866..f879eb81b06d 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -614,50 +614,3 @@ bool hv_is_isolation_supported(void) + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; + } + EXPORT_SYMBOL_GPL(hv_is_isolation_supported); +- +-/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ +-bool hv_query_ext_cap(u64 cap_query) +-{ +- /* +- * The address of the 'hv_extended_cap' variable will be used as an +- * output parameter to the hypercall below and so it should be +- * compatible with 'virt_to_phys'. Which means, it's address should be +- * directly mapped. Use 'static' to keep it compatible; stack variables +- * can be virtually mapped, making them imcompatible with +- * 'virt_to_phys'. +- * Hypercall input/output addresses should also be 8-byte aligned. +- */ +- static u64 hv_extended_cap __aligned(8); +- static bool hv_extended_cap_queried; +- u64 status; +- +- /* +- * Querying extended capabilities is an extended hypercall. Check if the +- * partition supports extended hypercall, first. +- */ +- if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) +- return false; +- +- /* Extended capabilities do not change at runtime. */ +- if (hv_extended_cap_queried) +- return hv_extended_cap & cap_query; +- +- status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, +- &hv_extended_cap); +- +- /* +- * The query extended capabilities hypercall should not fail under +- * any normal circumstances. Avoid repeatedly making the hypercall, on +- * error. +- */ +- hv_extended_cap_queried = true; +- status &= HV_HYPERCALL_RESULT_MASK; +- if (status != HV_STATUS_SUCCESS) { +- pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", +- status); +- return false; +- } +- +- return hv_extended_cap & cap_query; +-} +-EXPORT_SYMBOL_GPL(hv_query_ext_cap); +diff --git a/drivers/Makefile b/drivers/Makefile +index 576228037718..bd462459aa67 100644 +--- a/drivers/Makefile ++++ b/drivers/Makefile +@@ -160,7 +160,7 @@ obj-$(CONFIG_SOUNDWIRE) += soundwire/ + + # Virtualization drivers + obj-$(CONFIG_VIRT_DRIVERS) += virt/ +-obj-$(CONFIG_HYPERV) += hv/ ++obj-$(subst m,y,$(CONFIG_HYPERV)) += hv/ + + obj-$(CONFIG_PM_DEVFREQ) += devfreq/ + obj-$(CONFIG_EXTCON) += extcon/ +diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile +index 94daf8240c95..d76df5c8c2a9 100644 +--- a/drivers/hv/Makefile ++++ b/drivers/hv/Makefile +@@ -11,3 +11,6 @@ hv_vmbus-y := vmbus_drv.o \ + channel_mgmt.o ring_buffer.o hv_trace.o + hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o + hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o ++ ++# Code that must be built-in ++obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o +diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c +new file mode 100644 +index 000000000000..f0053c786891 +--- /dev/null ++++ b/drivers/hv/hv_common.c +@@ -0,0 +1,66 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Architecture neutral utility routines for interacting with ++ * Hyper-V. This file is specifically for code that must be ++ * built-in to the kernel image when CONFIG_HYPERV is set ++ * (vs. being in a module) because it is called from architecture ++ * specific code under arch/. ++ * ++ * Copyright (C) 2021, Microsoft, Inc. ++ * ++ * Author : Michael Kelley ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ ++bool hv_query_ext_cap(u64 cap_query) ++{ ++ /* ++ * The address of the 'hv_extended_cap' variable will be used as an ++ * output parameter to the hypercall below and so it should be ++ * compatible with 'virt_to_phys'. Which means, it's address should be ++ * directly mapped. Use 'static' to keep it compatible; stack variables ++ * can be virtually mapped, making them imcompatible with ++ * 'virt_to_phys'. ++ * Hypercall input/output addresses should also be 8-byte aligned. ++ */ ++ static u64 hv_extended_cap __aligned(8); ++ static bool hv_extended_cap_queried; ++ u64 status; ++ ++ /* ++ * Querying extended capabilities is an extended hypercall. Check if the ++ * partition supports extended hypercall, first. ++ */ ++ if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) ++ return false; ++ ++ /* Extended capabilities do not change at runtime. */ ++ if (hv_extended_cap_queried) ++ return hv_extended_cap & cap_query; ++ ++ status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, ++ &hv_extended_cap); ++ ++ /* ++ * The query extended capabilities hypercall should not fail under ++ * any normal circumstances. Avoid repeatedly making the hypercall, on ++ * error. ++ */ ++ hv_extended_cap_queried = true; ++ if (!hv_result_success(status)) { ++ pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", ++ status); ++ return false; ++ } ++ ++ return hv_extended_cap & cap_query; ++} ++EXPORT_SYMBOL_GPL(hv_query_ext_cap); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:52 +0800 +Subject: [PATCH 28/53] arm64: PCI: Restructure pcibios_root_bridge_prepare() + +Restructure the pcibios_root_bridge_prepare() as the preparation for +supporting cases when no real ACPI device is related to the PCI host +bridge. + +No functional change. + +Link: https://lore.kernel.org/r/20210726180657.142727-4-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +Acked-by: Catalin Marinas +(cherry picked from commit b424d4d4263200459615c87ad8dddaf4bb571a9d) +--- + arch/arm64/kernel/pci.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c +index 1006ed2d7c60..5148ae242780 100644 +--- a/arch/arm64/kernel/pci.c ++++ b/arch/arm64/kernel/pci.c +@@ -82,14 +82,19 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) + + int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) + { +- if (!acpi_disabled) { +- struct pci_config_window *cfg = bridge->bus->sysdata; +- struct acpi_device *adev = to_acpi_device(cfg->parent); +- struct device *bus_dev = &bridge->bus->dev; ++ struct pci_config_window *cfg; ++ struct acpi_device *adev; ++ struct device *bus_dev; + +- ACPI_COMPANION_SET(&bridge->dev, adev); +- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); +- } ++ if (acpi_disabled) ++ return 0; ++ ++ cfg = bridge->bus->sysdata; ++ adev = to_acpi_device(cfg->parent); ++ bus_dev = &bridge->bus->dev; ++ ++ ACPI_COMPANION_SET(&bridge->dev, adev); ++ set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); + + return 0; + } +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:53 +0800 +Subject: [PATCH 29/53] arm64: PCI: Support root bridge preparation for Hyper-V + +Currently at root bridge preparation, the corresponding ACPI device will +be set as the companion, however for a Hyper-V virtual PCI root bridge, +there is no corresponding ACPI device, because a Hyper-V virtual PCI +root bridge is discovered via VMBus rather than ACPI table. In order to +support this, we need to make pcibios_root_bridge_prepare() work with +cfg->parent being NULL. + +Use a NULL pointer as the ACPI device if there is no corresponding ACPI +device, and this is fine because: 1) ACPI_COMPANION_SET() can work with +the second parameter being NULL, 2) semantically, if a NULL pointer is +set via ACPI_COMPANION_SET(), ACPI_COMPANION() (the read API for this +field) will return NULL, and since ACPI_COMPANION() may return NULL, so +users must have handled the cases where it returns NULL, and 3) since +there is no corresponding ACPI device, it would be wrong to use any +other value here. + +Link: https://lore.kernel.org/r/20210726180657.142727-5-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +Acked-by: Catalin Marinas +(cherry picked from commit 7d40c0f70d92291605c4498b8ee4b3a3c3ba07b1) +--- + arch/arm64/kernel/pci.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c +index 5148ae242780..2276689b5411 100644 +--- a/arch/arm64/kernel/pci.c ++++ b/arch/arm64/kernel/pci.c +@@ -90,7 +90,17 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) + return 0; + + cfg = bridge->bus->sysdata; +- adev = to_acpi_device(cfg->parent); ++ ++ /* ++ * On Hyper-V there is no corresponding ACPI device for a root bridge, ++ * therefore ->parent is set as NULL by the driver. And set 'adev' as ++ * NULL in this case because there is no proper ACPI device. ++ */ ++ if (!cfg->parent) ++ adev = NULL; ++ else ++ adev = to_acpi_device(cfg->parent); ++ + bus_dev = &bridge->bus->dev; + + ACPI_COMPANION_SET(&bridge->dev, adev); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Sun, 11 Jul 2021 19:50:04 -0700 +Subject: [PATCH 30/53] asm-generic/hyperv: Add missing #include of nmi.h + +The recent move of hv_do_rep_hypercall() to this file adds +a reference to touch_nmi_watchdog(). Its function definition +is included indirectly when compiled on x86, but not when +compiled on ARM64. So add the explicit #include. + +No functional change. + +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/1626058204-2106-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit ba3f5839fbeb3f9e65070d90aa4e66008bbea80f) +--- + include/asm-generic/mshyperv.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index 7a2492fb0ee1..2c7500e4810f 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:55 +0800 +Subject: [PATCH 31/53] PCI: hv: Set ->domain_nr of pci_host_bridge at probing + time + +No functional change, just store and maintain the PCI domain number in +the ->domain_nr of pci_host_bridge. Note that we still need to keep +the copy of domain number in x86-specific pci_sysdata, because x86 is +not a PCI_DOMAINS_GENERIC=y architecture, so the ->domain_nr of +pci_host_bridge doesn't work for it yet. + +Link: https://lore.kernel.org/r/20210726180657.142727-7-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +(cherry picked from commit 38c0d266dc80b81f7f72314620f01ff6a1e119fe) +--- + drivers/pci/controller/pci-hyperv.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index 64df545b6dcf..3cbb04b1e886 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -2303,7 +2303,7 @@ static void hv_eject_device_work(struct work_struct *work) + * because hbus->pci_bus may not exist yet. + */ + wslot = wslot_to_devfn(hpdev->desc.win_slot.slot); +- pdev = pci_get_domain_bus_and_slot(hbus->sysdata.domain, 0, wslot); ++ pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot); + if (pdev) { + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(pdev); +@@ -3070,6 +3070,7 @@ static int hv_pci_probe(struct hv_device *hdev, + "PCI dom# 0x%hx has collision, using 0x%hx", + dom_req, dom); + ++ hbus->bridge->domain_nr = dom; + hbus->sysdata.domain = dom; + + hbus->hdev = hdev; +@@ -3080,7 +3081,7 @@ static int hv_pci_probe(struct hv_device *hdev, + spin_lock_init(&hbus->device_list_lock); + spin_lock_init(&hbus->retarget_msi_interrupt_lock); + hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, +- hbus->sysdata.domain); ++ hbus->bridge->domain_nr); + if (!hbus->wq) { + ret = -ENOMEM; + goto free_dom; +@@ -3207,7 +3208,7 @@ static int hv_pci_probe(struct hv_device *hdev, + destroy_wq: + destroy_workqueue(hbus->wq); + free_dom: +- hv_put_dom_num(hbus->sysdata.domain); ++ hv_put_dom_num(hbus->bridge->domain_nr); + free_bus: + kfree(hbus); + return ret; +@@ -3323,7 +3324,7 @@ static int hv_pci_remove(struct hv_device *hdev) + irq_domain_remove(hbus->irq_domain); + irq_domain_free_fwnode(hbus->sysdata.fwnode); + +- hv_put_dom_num(hbus->sysdata.domain); ++ hv_put_dom_num(hbus->bridge->domain_nr); + + kfree(hbus); + return ret; +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Tue, 27 Jul 2021 02:06:54 +0800 +Subject: [PATCH 32/53] PCI: hv: Generify PCI probing + +In order to support ARM64 Hyper-V PCI, we need to set up the bridge at +probing time because ARM64 is a PCI_DOMAIN_GENERIC=y arch and we don't +have pci_config_window (ARM64 sysdata) for a PCI root bus on Hyper-V, so +it's impossible to retrieve the information (e.g. PCI domains, MSI +domains) from bus sysdata on ARM64 after creation. + +Originally in create_root_hv_pci_bus(), pci_create_root_bus() is used to +create the root bus and the corresponding bridge based on x86 sysdata. +Now we create a bridge first and then call pci_scan_root_bus_bridge(), +which allows us to do the necessary set-ups for the bridge. + +Link: https://lore.kernel.org/r/20210726180657.142727-6-boqun.feng@gmail.com +Signed-off-by: Arnd Bergmann +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +(cherry picked from commit 418cb6c8e051119125b886c879efdacb04df7165) +--- + drivers/pci/controller/pci-hyperv.c | 57 +++++++++++++++-------------- + 1 file changed, 30 insertions(+), 27 deletions(-) + +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index 3cbb04b1e886..f9dc82258fdc 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -449,6 +449,7 @@ enum hv_pcibus_state { + + struct hv_pcibus_device { + struct pci_sysdata sysdata; ++ struct pci_host_bridge *bridge; + /* Protocol version negotiated with the host */ + enum pci_protocol_version_t protocol_version; + enum hv_pcibus_state state; +@@ -464,8 +465,6 @@ struct hv_pcibus_device { + spinlock_t device_list_lock; /* Protect lists below */ + void __iomem *cfg_addr; + +- struct list_head resources_for_children; +- + struct list_head children; + struct list_head dr_list; + +@@ -1798,7 +1797,7 @@ static void hv_pci_assign_slots(struct hv_pcibus_device *hbus) + + slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot)); + snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser); +- hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr, ++ hpdev->pci_slot = pci_create_slot(hbus->bridge->bus, slot_nr, + name, NULL); + if (IS_ERR(hpdev->pci_slot)) { + pr_warn("pci_create slot %s failed\n", name); +@@ -1828,7 +1827,7 @@ static void hv_pci_remove_slots(struct hv_pcibus_device *hbus) + static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) + { + struct pci_dev *dev; +- struct pci_bus *bus = hbus->pci_bus; ++ struct pci_bus *bus = hbus->bridge->bus; + struct hv_pci_dev *hv_dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { +@@ -1851,24 +1850,25 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) + */ + static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus) + { +- /* Register the device */ +- hbus->pci_bus = pci_create_root_bus(&hbus->hdev->device, +- 0, /* bus number is always zero */ +- &hv_pcifront_ops, +- &hbus->sysdata, +- &hbus->resources_for_children); +- if (!hbus->pci_bus) +- return -ENODEV; ++ int error; ++ struct pci_host_bridge *bridge = hbus->bridge; ++ ++ bridge->dev.parent = &hbus->hdev->device; ++ bridge->sysdata = &hbus->sysdata; ++ bridge->ops = &hv_pcifront_ops; ++ ++ error = pci_scan_root_bus_bridge(bridge); ++ if (error) ++ return error; + + hbus->pci_bus->msi = &hbus->msi_chip; + hbus->pci_bus->msi->dev = &hbus->hdev->device; + + pci_lock_rescan_remove(); +- pci_scan_child_bus(hbus->pci_bus); + hv_pci_assign_numa_node(hbus); +- pci_bus_assign_resources(hbus->pci_bus); ++ pci_bus_assign_resources(bridge->bus); + hv_pci_assign_slots(hbus); +- pci_bus_add_devices(hbus->pci_bus); ++ pci_bus_add_devices(bridge->bus); + pci_unlock_rescan_remove(); + hbus->state = hv_pcibus_installed; + return 0; +@@ -2131,7 +2131,7 @@ static void pci_devices_present_work(struct work_struct *work) + * because there may have been changes. + */ + pci_lock_rescan_remove(); +- pci_scan_child_bus(hbus->pci_bus); ++ pci_scan_child_bus(hbus->bridge->bus); + hv_pci_assign_numa_node(hbus); + hv_pci_assign_slots(hbus); + pci_unlock_rescan_remove(); +@@ -2299,8 +2299,8 @@ static void hv_eject_device_work(struct work_struct *work) + /* + * Ejection can come before or after the PCI bus has been set up, so + * attempt to find it and tear down the bus state, if it exists. This +- * must be done without constructs like pci_domain_nr(hbus->pci_bus) +- * because hbus->pci_bus may not exist yet. ++ * must be done without constructs like pci_domain_nr(hbus->bridge->bus) ++ * because hbus->bridge->bus may not exist yet. + */ + wslot = wslot_to_devfn(hpdev->desc.win_slot.slot); + pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot); +@@ -2666,8 +2666,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus) + /* Modify this resource to become a bridge window. */ + hbus->low_mmio_res->flags |= IORESOURCE_WINDOW; + hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY; +- pci_add_resource(&hbus->resources_for_children, +- hbus->low_mmio_res); ++ pci_add_resource(&hbus->bridge->windows, hbus->low_mmio_res); + } + + if (hbus->high_mmio_space) { +@@ -2686,8 +2685,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus) + /* Modify this resource to become a bridge window. */ + hbus->high_mmio_res->flags |= IORESOURCE_WINDOW; + hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY; +- pci_add_resource(&hbus->resources_for_children, +- hbus->high_mmio_res); ++ pci_add_resource(&hbus->bridge->windows, hbus->high_mmio_res); + } + + return 0; +@@ -3006,6 +3004,7 @@ static void hv_put_dom_num(u16 dom) + static int hv_pci_probe(struct hv_device *hdev, + const struct hv_vmbus_device_id *dev_id) + { ++ struct pci_host_bridge *bridge; + struct hv_pcibus_device *hbus; + u16 dom_req, dom; + char *name; +@@ -3018,6 +3017,10 @@ static int hv_pci_probe(struct hv_device *hdev, + */ + BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE); + ++ bridge = devm_pci_alloc_host_bridge(&hdev->device, 0); ++ if (!bridge) ++ return -ENOMEM; ++ + /* + * With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural + * alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate +@@ -3039,6 +3042,8 @@ static int hv_pci_probe(struct hv_device *hdev, + hbus = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); + if (!hbus) + return -ENOMEM; ++ ++ hbus->bridge = bridge; + hbus->state = hv_pcibus_init; + hbus->wslot_res_allocated = -1; + +@@ -3076,7 +3081,6 @@ static int hv_pci_probe(struct hv_device *hdev, + hbus->hdev = hdev; + INIT_LIST_HEAD(&hbus->children); + INIT_LIST_HEAD(&hbus->dr_list); +- INIT_LIST_HEAD(&hbus->resources_for_children); + spin_lock_init(&hbus->config_lock); + spin_lock_init(&hbus->device_list_lock); + spin_lock_init(&hbus->retarget_msi_interrupt_lock); +@@ -3307,9 +3311,9 @@ static int hv_pci_remove(struct hv_device *hdev) + + /* Remove the bus from PCI's point of view. */ + pci_lock_rescan_remove(); +- pci_stop_root_bus(hbus->pci_bus); ++ pci_stop_root_bus(hbus->bridge->bus); + hv_pci_remove_slots(hbus); +- pci_remove_root_bus(hbus->pci_bus); ++ pci_remove_root_bus(hbus->bridge->bus); + pci_unlock_rescan_remove(); + } + +@@ -3319,7 +3323,6 @@ static int hv_pci_remove(struct hv_device *hdev) + + iounmap(hbus->cfg_addr); + hv_free_config_window(hbus); +- pci_free_resource_list(&hbus->resources_for_children); + hv_pci_free_bridge_windows(hbus); + irq_domain_remove(hbus->irq_domain); + irq_domain_free_fwnode(hbus->sysdata.fwnode); +@@ -3402,7 +3405,7 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg) + */ + static void hv_pci_restore_msi_state(struct hv_pcibus_device *hbus) + { +- pci_walk_bus(hbus->pci_bus, hv_pci_restore_msi_msg, NULL); ++ pci_walk_bus(hbus->bridge->bus, hv_pci_restore_msi_msg, NULL); + } + + static int hv_pci_resume(struct hv_device *hdev) +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:50 +0800 +Subject: [PATCH 33/53] PCI: Introduce domain_nr in pci_host_bridge + +Currently we retrieve the PCI domain number of the host bridge from the +bus sysdata (or pci_config_window if PCI_DOMAINS_GENERIC=y). Actually +we have the information at PCI host bridge probing time, and it makes +sense that we store it into pci_host_bridge. One benefit of doing so is +the requirement for supporting PCI on Hyper-V for ARM64, because the +host bridge of Hyper-V doesn't have pci_config_window, whereas ARM64 is +a PCI_DOMAINS_GENERIC=y arch, so we cannot retrieve the PCI domain +number from pci_config_window on ARM64 Hyper-V guest. + +As the preparation for ARM64 Hyper-V PCI support, we introduce the +domain_nr in pci_host_bridge and a sentinel value to allow drivers to +set domain numbers properly at probing time. Currently +CONFIG_PCI_DOMAINS_GENERIC=y archs are only users of this +newly-introduced field. + +Link: https://lore.kernel.org/r/20210726180657.142727-2-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +Acked-by: Bjorn Helgaas +(cherry picked from commit 15d82ca23c996d50062286d27ed6a42a8105c04a) +--- + drivers/pci/probe.c | 6 +++++- + include/linux/pci.h | 11 +++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c +index ece90a23936d..7e4cb5f7c9ca 100644 +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -594,6 +594,7 @@ static void pci_init_host_bridge(struct pci_host_bridge *bridge) + bridge->native_pme = 1; + bridge->native_ltr = 1; + bridge->native_dpc = 1; ++ bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET; + + device_initialize(&bridge->dev); + } +@@ -898,7 +899,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) + bus->ops = bridge->ops; + bus->number = bus->busn_res.start = bridge->busnr; + #ifdef CONFIG_PCI_DOMAINS_GENERIC +- bus->domain_nr = pci_bus_find_domain_nr(bus, parent); ++ if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET) ++ bus->domain_nr = pci_bus_find_domain_nr(bus, parent); ++ else ++ bus->domain_nr = bridge->domain_nr; + #endif + + b = pci_find_bus(pci_domain_nr(bus), bridge->busnr); +diff --git a/include/linux/pci.h b/include/linux/pci.h +index a55097b4d992..25b4aa103029 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -521,6 +521,16 @@ static inline int pci_channel_offline(struct pci_dev *pdev) + return (pdev->error_state != pci_channel_io_normal); + } + ++/* ++ * Currently in ACPI spec, for each PCI host bridge, PCI Segment ++ * Group number is limited to a 16-bit value, therefore (int)-1 is ++ * not a valid PCI domain number, and can be used as a sentinel ++ * value indicating ->domain_nr is not set by the driver (and ++ * CONFIG_PCI_DOMAINS_GENERIC=y archs will set it with ++ * pci_bus_find_domain_nr()). ++ */ ++#define PCI_DOMAIN_NR_NOT_SET (-1) ++ + struct pci_host_bridge { + struct device dev; + struct pci_bus *bus; /* Root bus */ +@@ -528,6 +538,7 @@ struct pci_host_bridge { + struct pci_ops *child_ops; + void *sysdata; + int busnr; ++ int domain_nr; + struct list_head windows; /* resource_entry */ + struct list_head dma_ranges; /* dma ranges resource list */ + u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */ +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:51 +0800 +Subject: [PATCH 34/53] PCI: Support populating MSI domains of root buses via + bridges + +Currently, at probing time, the MSI domains of root buses are populated +if either the information of MSI domain is available from firmware (DT +or ACPI), or arch-specific sysdata is used to pass the fwnode of the MSI +domain. These two conditions don't cover all, e.g. Hyper-V virtual PCI +on ARM64, which doesn't have the MSI information in the firmware and +couldn't use arch-specific sysdata because running on an architecture +with PCI_DOMAINS_GENERIC=y. + +To support populating MSI domains of the root buses at the probing when +neither of the above condition is true, the ->msi_domain of the +corresponding bridge device is used: in pci_host_bridge_msi_domain(), +which should return the MSI domain of the root bus, the ->msi_domain of +the corresponding bridge is fetched first as a potential value of the +MSI domain of the root bus. + +In order to use the approach to populate MSI domains, the driver needs +to dev_set_msi_domain() on the bridge before calling +pci_register_host_bridge(), and makes sure GENERIC_MSI_IRQ_DOMAIN=y. + +Another advantage of this new approach is providing an arch-independent +way to populate MSI domains, which allows sharing the driver code as +much as possible between architectures. + +Originally-by: Arnd Bergmann +Link: https://lore.kernel.org/r/20210726180657.142727-3-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +Acked-by: Bjorn Helgaas +(cherry picked from commit 41dd40fd717997085588442821f4463e05c758cf) +--- + drivers/pci/probe.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c +index 7e4cb5f7c9ca..a49ca043ded8 100644 +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -828,11 +828,15 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus) + { + struct irq_domain *d; + ++ /* If the host bridge driver sets a MSI domain of the bridge, use it */ ++ d = dev_get_msi_domain(bus->bridge); ++ + /* + * Any firmware interface that can resolve the msi_domain + * should be called from here. + */ +- d = pci_host_bridge_of_msi_domain(bus); ++ if (!d) ++ d = pci_host_bridge_of_msi_domain(bus); + if (!d) + d = pci_host_bridge_acpi_msi_domain(bus); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 4 Aug 2021 08:52:39 -0700 +Subject: [PATCH 35/53] Drivers: hv: Enable Hyper-V code to be built on ARM64 + +Update drivers/hv/Kconfig so CONFIG_HYPERV can be selected on +ARM64, causing the Hyper-V specific code to be built. Exclude the +Hyper-V enlightened clocks/timers code from being built for ARM64. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Acked-by: Marc Zyngier +Acked-by: Mark Rutland +Acked-by: Catalin Marinas +Link: https://lore.kernel.org/r/1628092359-61351-6-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 7aff79e297ee1aa0126924921fd87a4ae59d2467) +--- + drivers/hv/Kconfig | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig +index 66c794d92391..d1123ceb38f3 100644 +--- a/drivers/hv/Kconfig ++++ b/drivers/hv/Kconfig +@@ -4,15 +4,16 @@ menu "Microsoft Hyper-V guest support" + + config HYPERV + tristate "Microsoft Hyper-V client drivers" +- depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST ++ depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ ++ || (ARM64 && !CPU_BIG_ENDIAN)) + select PARAVIRT +- select X86_HV_CALLBACK_VECTOR ++ select X86_HV_CALLBACK_VECTOR if X86 + help + Select this option to run Linux as a Hyper-V client operating + system. + + config HYPERV_TIMER +- def_bool HYPERV ++ def_bool HYPERV && X86 + + config HYPERV_UTILS + tristate "Microsoft Hyper-V Utilities driver" +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 4 Aug 2021 08:52:35 -0700 +Subject: [PATCH 36/53] arm64: hyperv: Add Hyper-V hypercall and register + access utilities + +hyperv-tlfs.h defines Hyper-V interfaces from the Hyper-V Top Level +Functional Spec (TLFS), and #includes the architecture-independent +part of hyperv-tlfs.h in include/asm-generic. The published TLFS +is distinctly oriented to x86/x64, so the ARM64-specific +hyperv-tlfs.h includes information for ARM64 that is not yet formally +published. The TLFS is available here: + + docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + +mshyperv.h defines Linux-specific structures and routines for +interacting with Hyper-V on ARM64, and #includes the architecture- +independent part of mshyperv.h in include/asm-generic. + +Use these definitions to provide utility functions to make +Hyper-V hypercalls and to get and set Hyper-V provided +registers associated with a virtual processor. + +Signed-off-by: Michael Kelley +Reviewed-by: Sunil Muthuswamy +Acked-by: Marc Zyngier +Acked-by: Mark Rutland +Acked-by: Catalin Marinas +Link: https://lore.kernel.org/r/1628092359-61351-2-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 57d276bbbd322409bb6f7c6446187a29953f8ded) +--- + MAINTAINERS | 3 + + arch/arm64/Kbuild | 1 + + arch/arm64/hyperv/Makefile | 2 + + arch/arm64/hyperv/hv_core.c | 129 +++++++++++++++++++++++++++ + arch/arm64/include/asm/hyperv-tlfs.h | 69 ++++++++++++++ + arch/arm64/include/asm/mshyperv.h | 54 +++++++++++ + 6 files changed, 258 insertions(+) + create mode 100644 arch/arm64/hyperv/Makefile + create mode 100644 arch/arm64/hyperv/hv_core.c + create mode 100644 arch/arm64/include/asm/hyperv-tlfs.h + create mode 100644 arch/arm64/include/asm/mshyperv.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4fef10dd2975..aad9e6c8af38 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -8154,6 +8154,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git + F: Documentation/ABI/stable/sysfs-bus-vmbus + F: Documentation/ABI/testing/debugfs-hyperv + F: Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst ++F: arch/arm64/hyperv ++F: arch/arm64/include/asm/hyperv-tlfs.h ++F: arch/arm64/include/asm/mshyperv.h + F: arch/x86/hyperv + F: arch/x86/include/asm/hyperv-tlfs.h + F: arch/x86/include/asm/mshyperv.h +diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild +index d6465823b281..7a37608fed76 100644 +--- a/arch/arm64/Kbuild ++++ b/arch/arm64/Kbuild +@@ -3,4 +3,5 @@ obj-y += kernel/ mm/ + obj-$(CONFIG_NET) += net/ + obj-$(CONFIG_KVM) += kvm/ + obj-$(CONFIG_XEN) += xen/ ++obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/ + obj-$(CONFIG_CRYPTO) += crypto/ +diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile +new file mode 100644 +index 000000000000..1697d30ff106 +--- /dev/null ++++ b/arch/arm64/hyperv/Makefile +@@ -0,0 +1,2 @@ ++# SPDX-License-Identifier: GPL-2.0 ++obj-y := hv_core.o +diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c +new file mode 100644 +index 000000000000..4c5dc0f51b12 +--- /dev/null ++++ b/arch/arm64/hyperv/hv_core.c +@@ -0,0 +1,129 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Low level utility routines for interacting with Hyper-V. ++ * ++ * Copyright (C) 2021, Microsoft, Inc. ++ * ++ * Author : Michael Kelley ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * hv_do_hypercall- Invoke the specified hypercall ++ */ ++u64 hv_do_hypercall(u64 control, void *input, void *output) ++{ ++ struct arm_smccc_res res; ++ u64 input_address; ++ u64 output_address; ++ ++ input_address = input ? virt_to_phys(input) : 0; ++ output_address = output ? virt_to_phys(output) : 0; ++ ++ arm_smccc_1_1_hvc(HV_FUNC_ID, control, ++ input_address, output_address, &res); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(hv_do_hypercall); ++ ++/* ++ * hv_do_fast_hypercall8 -- Invoke the specified hypercall ++ * with arguments in registers instead of physical memory. ++ * Avoids the overhead of virt_to_phys for simple hypercalls. ++ */ ++ ++u64 hv_do_fast_hypercall8(u16 code, u64 input) ++{ ++ struct arm_smccc_res res; ++ u64 control; ++ ++ control = (u64)code | HV_HYPERCALL_FAST_BIT; ++ ++ arm_smccc_1_1_hvc(HV_FUNC_ID, control, input, &res); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(hv_do_fast_hypercall8); ++ ++/* ++ * Set a single VP register to a 64-bit value. ++ */ ++void hv_set_vpreg(u32 msr, u64 value) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_hvc(HV_FUNC_ID, ++ HVCALL_SET_VP_REGISTERS | HV_HYPERCALL_FAST_BIT | ++ HV_HYPERCALL_REP_COMP_1, ++ HV_PARTITION_ID_SELF, ++ HV_VP_INDEX_SELF, ++ msr, ++ 0, ++ value, ++ 0, ++ &res); ++ ++ /* ++ * Something is fundamentally broken in the hypervisor if ++ * setting a VP register fails. There's really no way to ++ * continue as a guest VM, so panic. ++ */ ++ BUG_ON(!hv_result_success(res.a0)); ++} ++EXPORT_SYMBOL_GPL(hv_set_vpreg); ++ ++/* ++ * Get the value of a single VP register. One version ++ * returns just 64 bits and another returns the full 128 bits. ++ * The two versions are separate to avoid complicating the ++ * calling sequence for the more frequently used 64 bit version. ++ */ ++ ++void hv_get_vpreg_128(u32 msr, struct hv_get_vp_registers_output *result) ++{ ++ struct arm_smccc_1_2_regs args; ++ struct arm_smccc_1_2_regs res; ++ ++ args.a0 = HV_FUNC_ID; ++ args.a1 = HVCALL_GET_VP_REGISTERS | HV_HYPERCALL_FAST_BIT | ++ HV_HYPERCALL_REP_COMP_1; ++ args.a2 = HV_PARTITION_ID_SELF; ++ args.a3 = HV_VP_INDEX_SELF; ++ args.a4 = msr; ++ ++ /* ++ * Use the SMCCC 1.2 interface because the results are in registers ++ * beyond X0-X3. ++ */ ++ arm_smccc_1_2_hvc(&args, &res); ++ ++ /* ++ * Something is fundamentally broken in the hypervisor if ++ * getting a VP register fails. There's really no way to ++ * continue as a guest VM, so panic. ++ */ ++ BUG_ON(!hv_result_success(res.a0)); ++ ++ result->as64.low = res.a6; ++ result->as64.high = res.a7; ++} ++EXPORT_SYMBOL_GPL(hv_get_vpreg_128); ++ ++u64 hv_get_vpreg(u32 msr) ++{ ++ struct hv_get_vp_registers_output output; ++ ++ hv_get_vpreg_128(msr, &output); ++ ++ return output.as64.low; ++} ++EXPORT_SYMBOL_GPL(hv_get_vpreg); +diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h +new file mode 100644 +index 000000000000..4d964a7f02ee +--- /dev/null ++++ b/arch/arm64/include/asm/hyperv-tlfs.h +@@ -0,0 +1,69 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++/* ++ * This file contains definitions from the Hyper-V Hypervisor Top-Level ++ * Functional Specification (TLFS): ++ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs ++ * ++ * Copyright (C) 2021, Microsoft, Inc. ++ * ++ * Author : Michael Kelley ++ */ ++ ++#ifndef _ASM_HYPERV_TLFS_H ++#define _ASM_HYPERV_TLFS_H ++ ++#include ++ ++/* ++ * All data structures defined in the TLFS that are shared between Hyper-V ++ * and a guest VM use Little Endian byte ordering. This matches the default ++ * byte ordering of Linux running on ARM64, so no special handling is required. ++ */ ++ ++/* ++ * These Hyper-V registers provide information equivalent to the CPUID ++ * instruction on x86/x64. ++ */ ++#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ ++#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ ++#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ ++ ++/* ++ * Group C Features. See the asm-generic version of hyperv-tlfs.h ++ * for a description of Feature Groups. ++ */ ++ ++/* Crash MSRs available */ ++#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) ++ ++/* STIMER direct mode is available */ ++#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) ++ ++/* ++ * Synthetic register definitions equivalent to MSRs on x86/x64 ++ */ ++#define HV_REGISTER_CRASH_P0 0x00000210 ++#define HV_REGISTER_CRASH_P1 0x00000211 ++#define HV_REGISTER_CRASH_P2 0x00000212 ++#define HV_REGISTER_CRASH_P3 0x00000213 ++#define HV_REGISTER_CRASH_P4 0x00000214 ++#define HV_REGISTER_CRASH_CTL 0x00000215 ++ ++#define HV_REGISTER_GUEST_OSID 0x00090002 ++#define HV_REGISTER_VP_INDEX 0x00090003 ++#define HV_REGISTER_TIME_REF_COUNT 0x00090004 ++#define HV_REGISTER_REFERENCE_TSC 0x00090017 ++ ++#define HV_REGISTER_SINT0 0x000A0000 ++#define HV_REGISTER_SCONTROL 0x000A0010 ++#define HV_REGISTER_SIEFP 0x000A0012 ++#define HV_REGISTER_SIMP 0x000A0013 ++#define HV_REGISTER_EOM 0x000A0014 ++ ++#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 ++#define HV_REGISTER_STIMER0_COUNT 0x000B0001 ++ ++#include ++ ++#endif +diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h +new file mode 100644 +index 000000000000..20070a847304 +--- /dev/null ++++ b/arch/arm64/include/asm/mshyperv.h +@@ -0,0 +1,54 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++/* ++ * Linux-specific definitions for managing interactions with Microsoft's ++ * Hyper-V hypervisor. The definitions in this file are specific to ++ * the ARM64 architecture. See include/asm-generic/mshyperv.h for ++ * definitions are that architecture independent. ++ * ++ * Definitions that are specified in the Hyper-V Top Level Functional ++ * Spec (TLFS) should not go in this file, but should instead go in ++ * hyperv-tlfs.h. ++ * ++ * Copyright (C) 2021, Microsoft, Inc. ++ * ++ * Author : Michael Kelley ++ */ ++ ++#ifndef _ASM_MSHYPERV_H ++#define _ASM_MSHYPERV_H ++ ++#include ++#include ++#include ++ ++/* ++ * Declare calls to get and set Hyper-V VP register values on ARM64, which ++ * requires a hypercall. ++ */ ++ ++void hv_set_vpreg(u32 reg, u64 value); ++u64 hv_get_vpreg(u32 reg); ++void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result); ++ ++static inline void hv_set_register(unsigned int reg, u64 value) ++{ ++ hv_set_vpreg(reg, value); ++} ++ ++static inline u64 hv_get_register(unsigned int reg) ++{ ++ return hv_get_vpreg(reg); ++} ++ ++/* SMCCC hypercall parameters */ ++#define HV_SMCCC_FUNC_NUMBER 1 ++#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \ ++ ARM_SMCCC_STD_CALL, \ ++ ARM_SMCCC_SMC_64, \ ++ ARM_SMCCC_OWNER_VENDOR_HYP, \ ++ HV_SMCCC_FUNC_NUMBER) ++ ++#include ++ ++#endif +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 4 Aug 2021 08:52:37 -0700 +Subject: [PATCH 37/53] arm64: hyperv: Initialize hypervisor on boot + +Add ARM64-specific code to initialize the Hyper-V +hypervisor when booting as a guest VM. + +This code is built only when CONFIG_HYPERV is enabled. + +Signed-off-by: Michael Kelley +Acked-by: Marc Zyngier +Acked-by: Mark Rutland +Acked-by: Catalin Marinas +Link: https://lore.kernel.org/r/1628092359-61351-4-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 9bbb888824e38cc2e9118ed351fe3d22403a73e1) +--- + arch/arm64/hyperv/Makefile | 2 +- + arch/arm64/hyperv/mshyperv.c | 87 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 88 insertions(+), 1 deletion(-) + create mode 100644 arch/arm64/hyperv/mshyperv.c + +diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile +index 1697d30ff106..87c31c001da9 100644 +--- a/arch/arm64/hyperv/Makefile ++++ b/arch/arm64/hyperv/Makefile +@@ -1,2 +1,2 @@ + # SPDX-License-Identifier: GPL-2.0 +-obj-y := hv_core.o ++obj-y := hv_core.o mshyperv.o +diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c +new file mode 100644 +index 000000000000..bbbe351e9045 +--- /dev/null ++++ b/arch/arm64/hyperv/mshyperv.c +@@ -0,0 +1,87 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Core routines for interacting with Microsoft's Hyper-V hypervisor, ++ * including hypervisor initialization. ++ * ++ * Copyright (C) 2021, Microsoft, Inc. ++ * ++ * Author : Michael Kelley ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static bool hyperv_initialized; ++ ++static int __init hyperv_init(void) ++{ ++ struct hv_get_vp_registers_output result; ++ u32 a, b, c, d; ++ u64 guest_id; ++ int ret; ++ ++ /* ++ * Allow for a kernel built with CONFIG_HYPERV to be running in ++ * a non-Hyper-V environment, including on DT instead of ACPI. ++ * In such cases, do nothing and return success. ++ */ ++ if (acpi_disabled) ++ return 0; ++ ++ if (strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8)) ++ return 0; ++ ++ /* Setup the guest ID */ ++ guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); ++ hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id); ++ ++ /* Get the features and hints from Hyper-V */ ++ hv_get_vpreg_128(HV_REGISTER_FEATURES, &result); ++ ms_hyperv.features = result.as32.a; ++ ms_hyperv.priv_high = result.as32.b; ++ ms_hyperv.misc_features = result.as32.c; ++ ++ hv_get_vpreg_128(HV_REGISTER_ENLIGHTENMENTS, &result); ++ ms_hyperv.hints = result.as32.a; ++ ++ pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n", ++ ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints, ++ ms_hyperv.misc_features); ++ ++ /* Get information about the Hyper-V host version */ ++ hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, &result); ++ a = result.as32.a; ++ b = result.as32.b; ++ c = result.as32.c; ++ d = result.as32.d; ++ pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", ++ b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24); ++ ++ ret = hv_common_init(); ++ if (ret) ++ return ret; ++ ++ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", ++ hv_common_cpu_init, hv_common_cpu_die); ++ if (ret < 0) { ++ hv_common_free(); ++ return ret; ++ } ++ ++ hyperv_initialized = true; ++ return 0; ++} ++ ++early_initcall(hyperv_init); ++ ++bool hv_is_hyperv_initialized(void) ++{ ++ return hyperv_initialized; ++} ++EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 4 Aug 2021 08:52:36 -0700 +Subject: [PATCH 38/53] arm64: hyperv: Add panic handler + +Add a function to inform Hyper-V about a guest panic. + +This code is built only when CONFIG_HYPERV is enabled. + +Signed-off-by: Michael Kelley +Reviewed-by: Sunil Muthuswamy +Reviewed-by: Boqun Feng +Acked-by: Marc Zyngier +Acked-by: Mark Rutland +Acked-by: Catalin Marinas +Link: https://lore.kernel.org/r/1628092359-61351-3-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 512c1117fb2eeb944df1b88bff6e0c002990b369) +--- + arch/arm64/hyperv/hv_core.c | 52 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c +index 4c5dc0f51b12..b54c34793701 100644 +--- a/arch/arm64/hyperv/hv_core.c ++++ b/arch/arm64/hyperv/hv_core.c +@@ -127,3 +127,55 @@ u64 hv_get_vpreg(u32 msr) + return output.as64.low; + } + EXPORT_SYMBOL_GPL(hv_get_vpreg); ++ ++/* ++ * hyperv_report_panic - report a panic to Hyper-V. This function uses ++ * the older version of the Hyper-V interface that admittedly doesn't ++ * pass enough information to be useful beyond just recording the ++ * occurrence of a panic. The parallel hv_kmsg_dump() uses the ++ * new interface that allows reporting 4 Kbytes of data, which is much ++ * more useful. Hyper-V on ARM64 always supports the newer interface, but ++ * we retain support for the older version because the sysadmin is allowed ++ * to disable the newer version via sysctl in case of information security ++ * concerns about the more verbose version. ++ */ ++void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) ++{ ++ static bool panic_reported; ++ u64 guest_id; ++ ++ /* Don't report a panic to Hyper-V if we're not going to panic */ ++ if (in_die && !panic_on_oops) ++ return; ++ ++ /* ++ * We prefer to report panic on 'die' chain as we have proper ++ * registers to report, but if we miss it (e.g. on BUG()) we need ++ * to report it on 'panic'. ++ * ++ * Calling code in the 'die' and 'panic' paths ensures that only ++ * one CPU is running this code, so no atomicity is needed. ++ */ ++ if (panic_reported) ++ return; ++ panic_reported = true; ++ ++ guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OSID); ++ ++ /* ++ * Hyper-V provides the ability to store only 5 values. ++ * Pick the passed in error value, the guest_id, the PC, ++ * and the SP. ++ */ ++ hv_set_vpreg(HV_REGISTER_CRASH_P0, err); ++ hv_set_vpreg(HV_REGISTER_CRASH_P1, guest_id); ++ hv_set_vpreg(HV_REGISTER_CRASH_P2, regs->pc); ++ hv_set_vpreg(HV_REGISTER_CRASH_P3, regs->sp); ++ hv_set_vpreg(HV_REGISTER_CRASH_P4, 0); ++ ++ /* ++ * Let Hyper-V know there is crash data available ++ */ ++ hv_set_vpreg(HV_REGISTER_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); ++} ++EXPORT_SYMBOL_GPL(hyperv_report_panic); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 4 Aug 2021 08:52:38 -0700 +Subject: [PATCH 39/53] arm64: efi: Export screen_info + +The Hyper-V frame buffer driver may be built as a module, and +it needs access to screen_info. So export screen_info. + +Signed-off-by: Michael Kelley +Acked-by: Ard Biesheuvel +Acked-by: Marc Zyngier +Acked-by: Mark Rutland +Acked-by: Catalin Marinas +Link: https://lore.kernel.org/r/1628092359-61351-5-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 9b16c2132f34316bf0b59d24357a788cc1e9e352) +--- + arch/arm64/kernel/efi.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c +index fa02efb28e88..e1be6c429810 100644 +--- a/arch/arm64/kernel/efi.c ++++ b/arch/arm64/kernel/efi.c +@@ -55,6 +55,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) + + /* we will fill this structure from the stub, so don't put it in .bss */ + struct screen_info screen_info __section(".data"); ++EXPORT_SYMBOL(screen_info); + + int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) + { +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:57 +0800 +Subject: [PATCH 40/53] PCI: hv: Turn on the host bridge probing on ARM64 + +Now we have everything we need, just provide a proper sysdata type for +the bus to use on ARM64 and everything else works. + +Link: https://lore.kernel.org/r/20210726180657.142727-9-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +(cherry picked from commit 88f94c7f8f40d7e26f991f6f6ed914ff44361d75) +--- + drivers/pci/controller/pci-hyperv.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index f9dc82258fdc..6992e3e89768 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -448,7 +449,11 @@ enum hv_pcibus_state { + }; + + struct hv_pcibus_device { ++#ifdef CONFIG_X86 + struct pci_sysdata sysdata; ++#elif defined(CONFIG_ARM64) ++ struct pci_config_window sysdata; ++#endif + struct pci_host_bridge *bridge; + /* Protocol version negotiated with the host */ + enum pci_protocol_version_t protocol_version; +@@ -3076,7 +3081,9 @@ static int hv_pci_probe(struct hv_device *hdev, + dom_req, dom); + + hbus->bridge->domain_nr = dom; ++#ifdef CONFIG_X86 + hbus->sysdata.domain = dom; ++#endif + + hbus->hdev = hdev; + INIT_LIST_HEAD(&hbus->children); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Matheus Castello +Date: Wed, 25 Nov 2020 00:29:26 -0300 +Subject: [PATCH 41/53] drivers: hv: vmbus: Fix checkpatch SPLIT_STRING + +Checkpatch emits WARNING: quoted string split across lines. +To keep the code clean and with the 80 column length indentation the +check and registration code for kmsg_dump_register has been transferred +to a new function hv_kmsg_dump_register. + +Signed-off-by: Matheus Castello +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20201125032926.17002-1-matheus@castello.eng.br +Signed-off-by: Wei Liu +(cherry picked from commit b0c03eff79a67aa43f17249dd42fac58e96718dc) +--- + drivers/hv/vmbus_drv.c | 35 ++++++++++++++++++++--------------- + 1 file changed, 20 insertions(+), 15 deletions(-) + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 4f9a1d12aa88..7551b5c11a98 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1398,6 +1398,24 @@ static struct kmsg_dumper hv_kmsg_dumper = { + .dump = hv_kmsg_dump, + }; + ++static void hv_kmsg_dump_register(void) ++{ ++ int ret; ++ ++ hv_panic_page = hv_alloc_hyperv_zeroed_page(); ++ if (!hv_panic_page) { ++ pr_err("Hyper-V: panic message page memory allocation failed\n"); ++ return; ++ } ++ ++ ret = kmsg_dump_register(&hv_kmsg_dumper); ++ if (ret) { ++ pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); ++ hv_free_hyperv_page((unsigned long)hv_panic_page); ++ hv_panic_page = NULL; ++ } ++} ++ + static struct ctl_table_header *hv_ctl_table_hdr; + + /* +@@ -1488,21 +1506,8 @@ static int vmbus_bus_init(void) + * capability is supported by the hypervisor. + */ + hv_get_crash_ctl(hyperv_crash_ctl); +- if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { +- hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); +- if (hv_panic_page) { +- ret = kmsg_dump_register(&hv_kmsg_dumper); +- if (ret) { +- pr_err("Hyper-V: kmsg dump register " +- "error 0x%x\n", ret); +- hv_free_hyperv_page( +- (unsigned long)hv_panic_page); +- hv_panic_page = NULL; +- } +- } else +- pr_err("Hyper-V: panic message page memory " +- "allocation failed"); +- } ++ if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) ++ hv_kmsg_dump_register(); + + register_die_notifier(&hyperv_die_block); + } +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:15 -0800 +Subject: [PATCH 42/53] Drivers: hv: Redo Hyper-V synthetic MSR get/set + functions + +Current code defines a separate get and set macro for each Hyper-V +synthetic MSR used by the VMbus driver. Furthermore, the get macro +can't be converted to a standard function because the second argument +is modified in place, which is somewhat bad form. + +Redo this by providing a single get and a single set function that +take a parameter specifying the MSR to be operated on. Fixup usage +of the get function. Calling locations are no more complex than before, +but the code under arch/x86 and the upcoming code under arch/arm64 +is significantly simplified. + +Also standardize the names of Hyper-V synthetic MSRs that are +architecture neutral. But keep the old x86-specific names as aliases +that can be removed later when all references (particularly in KVM +code) have been cleaned up in a separate patch series. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Link: https://lore.kernel.org/r/1614721102-2241-4-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit f3c5e63c3690fc64e5a7a2b3e4f9f5ff1fa25584) +--- + arch/x86/hyperv/hv_init.c | 2 +- + arch/x86/include/asm/hyperv-tlfs.h | 102 ++++++++++++++++++----------- + arch/x86/include/asm/mshyperv.h | 39 +++-------- + drivers/clocksource/hyperv_timer.c | 26 ++++---- + drivers/hv/hv.c | 37 ++++++----- + drivers/hv/vmbus_drv.c | 2 +- + include/asm-generic/mshyperv.h | 2 +- + 7 files changed, 110 insertions(+), 100 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index f879eb81b06d..48a5c45c09cb 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -75,7 +75,7 @@ static int hv_cpu_init(unsigned int cpu) + *output_arg = page_address(pg + 1); + } + +- hv_get_vp_index(msr_vp_index); ++ msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); + + hv_vp_index[smp_processor_id()] = msr_vp_index; + +diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h +index 119cc587775a..89eb0a885319 100644 +--- a/arch/x86/include/asm/hyperv-tlfs.h ++++ b/arch/x86/include/asm/hyperv-tlfs.h +@@ -149,7 +149,7 @@ enum hv_isolation_type { + #define HV_X64_MSR_HYPERCALL 0x40000001 + + /* MSR used to provide vcpu index */ +-#define HV_X64_MSR_VP_INDEX 0x40000002 ++#define HV_REGISTER_VP_INDEX 0x40000002 + + /* MSR used to reset the guest OS. */ + #define HV_X64_MSR_RESET 0x40000003 +@@ -158,10 +158,10 @@ enum hv_isolation_type { + #define HV_X64_MSR_VP_RUNTIME 0x40000010 + + /* MSR used to read the per-partition time reference counter */ +-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 ++#define HV_REGISTER_TIME_REF_COUNT 0x40000020 + + /* A partition's reference time stamp counter (TSC) page */ +-#define HV_X64_MSR_REFERENCE_TSC 0x40000021 ++#define HV_REGISTER_REFERENCE_TSC 0x40000021 + + /* MSR used to retrieve the TSC frequency */ + #define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +@@ -176,50 +176,50 @@ enum hv_isolation_type { + #define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 + + /* Define synthetic interrupt controller model specific registers. */ +-#define HV_X64_MSR_SCONTROL 0x40000080 +-#define HV_X64_MSR_SVERSION 0x40000081 +-#define HV_X64_MSR_SIEFP 0x40000082 +-#define HV_X64_MSR_SIMP 0x40000083 +-#define HV_X64_MSR_EOM 0x40000084 +-#define HV_X64_MSR_SINT0 0x40000090 +-#define HV_X64_MSR_SINT1 0x40000091 +-#define HV_X64_MSR_SINT2 0x40000092 +-#define HV_X64_MSR_SINT3 0x40000093 +-#define HV_X64_MSR_SINT4 0x40000094 +-#define HV_X64_MSR_SINT5 0x40000095 +-#define HV_X64_MSR_SINT6 0x40000096 +-#define HV_X64_MSR_SINT7 0x40000097 +-#define HV_X64_MSR_SINT8 0x40000098 +-#define HV_X64_MSR_SINT9 0x40000099 +-#define HV_X64_MSR_SINT10 0x4000009A +-#define HV_X64_MSR_SINT11 0x4000009B +-#define HV_X64_MSR_SINT12 0x4000009C +-#define HV_X64_MSR_SINT13 0x4000009D +-#define HV_X64_MSR_SINT14 0x4000009E +-#define HV_X64_MSR_SINT15 0x4000009F ++#define HV_REGISTER_SCONTROL 0x40000080 ++#define HV_REGISTER_SVERSION 0x40000081 ++#define HV_REGISTER_SIEFP 0x40000082 ++#define HV_REGISTER_SIMP 0x40000083 ++#define HV_REGISTER_EOM 0x40000084 ++#define HV_REGISTER_SINT0 0x40000090 ++#define HV_REGISTER_SINT1 0x40000091 ++#define HV_REGISTER_SINT2 0x40000092 ++#define HV_REGISTER_SINT3 0x40000093 ++#define HV_REGISTER_SINT4 0x40000094 ++#define HV_REGISTER_SINT5 0x40000095 ++#define HV_REGISTER_SINT6 0x40000096 ++#define HV_REGISTER_SINT7 0x40000097 ++#define HV_REGISTER_SINT8 0x40000098 ++#define HV_REGISTER_SINT9 0x40000099 ++#define HV_REGISTER_SINT10 0x4000009A ++#define HV_REGISTER_SINT11 0x4000009B ++#define HV_REGISTER_SINT12 0x4000009C ++#define HV_REGISTER_SINT13 0x4000009D ++#define HV_REGISTER_SINT14 0x4000009E ++#define HV_REGISTER_SINT15 0x4000009F + + /* + * Synthetic Timer MSRs. Four timers per vcpu. + */ +-#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +-#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +-#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +-#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +-#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +-#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +-#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +-#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 ++#define HV_REGISTER_STIMER0_CONFIG 0x400000B0 ++#define HV_REGISTER_STIMER0_COUNT 0x400000B1 ++#define HV_REGISTER_STIMER1_CONFIG 0x400000B2 ++#define HV_REGISTER_STIMER1_COUNT 0x400000B3 ++#define HV_REGISTER_STIMER2_CONFIG 0x400000B4 ++#define HV_REGISTER_STIMER2_COUNT 0x400000B5 ++#define HV_REGISTER_STIMER3_CONFIG 0x400000B6 ++#define HV_REGISTER_STIMER3_COUNT 0x400000B7 + + /* Hyper-V guest idle MSR */ + #define HV_X64_MSR_GUEST_IDLE 0x400000F0 + + /* Hyper-V guest crash notification MSR's */ +-#define HV_X64_MSR_CRASH_P0 0x40000100 +-#define HV_X64_MSR_CRASH_P1 0x40000101 +-#define HV_X64_MSR_CRASH_P2 0x40000102 +-#define HV_X64_MSR_CRASH_P3 0x40000103 +-#define HV_X64_MSR_CRASH_P4 0x40000104 +-#define HV_X64_MSR_CRASH_CTL 0x40000105 ++#define HV_REGISTER_CRASH_P0 0x40000100 ++#define HV_REGISTER_CRASH_P1 0x40000101 ++#define HV_REGISTER_CRASH_P2 0x40000102 ++#define HV_REGISTER_CRASH_P3 0x40000103 ++#define HV_REGISTER_CRASH_P4 0x40000104 ++#define HV_REGISTER_CRASH_CTL 0x40000105 + + /* TSC emulation after migration */ + #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +@@ -229,6 +229,32 @@ enum hv_isolation_type { + /* TSC invariant control */ + #define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 + ++/* Register name aliases for temporary compatibility */ ++#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT ++#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG ++#define HV_X64_MSR_STIMER1_COUNT HV_REGISTER_STIMER1_COUNT ++#define HV_X64_MSR_STIMER1_CONFIG HV_REGISTER_STIMER1_CONFIG ++#define HV_X64_MSR_STIMER2_COUNT HV_REGISTER_STIMER2_COUNT ++#define HV_X64_MSR_STIMER2_CONFIG HV_REGISTER_STIMER2_CONFIG ++#define HV_X64_MSR_STIMER3_COUNT HV_REGISTER_STIMER3_COUNT ++#define HV_X64_MSR_STIMER3_CONFIG HV_REGISTER_STIMER3_CONFIG ++#define HV_X64_MSR_SCONTROL HV_REGISTER_SCONTROL ++#define HV_X64_MSR_SVERSION HV_REGISTER_SVERSION ++#define HV_X64_MSR_SIMP HV_REGISTER_SIMP ++#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP ++#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX ++#define HV_X64_MSR_EOM HV_REGISTER_EOM ++#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0 ++#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15 ++#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0 ++#define HV_X64_MSR_CRASH_P1 HV_REGISTER_CRASH_P1 ++#define HV_X64_MSR_CRASH_P2 HV_REGISTER_CRASH_P2 ++#define HV_X64_MSR_CRASH_P3 HV_REGISTER_CRASH_P3 ++#define HV_X64_MSR_CRASH_P4 HV_REGISTER_CRASH_P4 ++#define HV_X64_MSR_CRASH_CTL HV_REGISTER_CRASH_CTL ++#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT ++#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC ++ + /* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index a34f31dd5d93..ec7928d0120b 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -15,41 +15,22 @@ typedef int (*hyperv_fill_flush_list_func)( + struct hv_guest_mapping_flush_list *flush, + void *data); + +-#define hv_init_timer(timer, tick) \ +- wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick) +-#define hv_init_timer_config(timer, val) \ +- wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val) +- +-#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) +-#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) +- +-#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val) +-#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val) +- +-#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val) +-#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val) ++static inline void hv_set_register(unsigned int reg, u64 value) ++{ ++ wrmsrl(reg, value); ++} + +-#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) ++static inline u64 hv_get_register(unsigned int reg) ++{ ++ u64 value; + +-#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0) ++ rdmsrl(reg, value); ++ return value; ++} + +-#define hv_get_synint_state(int_num, val) \ +- rdmsrl(HV_X64_MSR_SINT0 + int_num, val) +-#define hv_set_synint_state(int_num, val) \ +- wrmsrl(HV_X64_MSR_SINT0 + int_num, val) + #define hv_recommend_using_aeoi() \ + (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)) + +-#define hv_get_crash_ctl(val) \ +- rdmsrl(HV_X64_MSR_CRASH_CTL, val) +- +-#define hv_get_time_ref_count(val) \ +- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) +- +-#define hv_get_reference_tsc(val) \ +- rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) +-#define hv_set_reference_tsc(val) \ +- wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) + #define hv_set_clocksource_vdso(val) \ + ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) + #define hv_enable_vdso_clocksource() \ +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index c97e1b1e6653..bbda003f8a1c 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -68,14 +68,14 @@ static int hv_ce_set_next_event(unsigned long delta, + + current_tick = hv_read_reference_counter(); + current_tick += delta; +- hv_init_timer(0, current_tick); ++ hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick); + return 0; + } + + static int hv_ce_shutdown(struct clock_event_device *evt) + { +- hv_init_timer(0, 0); +- hv_init_timer_config(0, 0); ++ hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); ++ hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); + if (direct_mode_enabled) + hv_disable_stimer0_percpu_irq(stimer0_irq); + +@@ -105,7 +105,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) + timer_cfg.direct_mode = 0; + timer_cfg.sintx = stimer0_message_sint; + } +- hv_init_timer_config(0, timer_cfg.as_uint64); ++ hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64); + return 0; + } + +@@ -323,7 +323,7 @@ static u64 notrace read_hv_clock_tsc(void) + u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); + + if (current_tick == U64_MAX) +- hv_get_time_ref_count(current_tick); ++ current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); + + return current_tick; + } +@@ -344,9 +344,9 @@ static void suspend_hv_clock_tsc(struct clocksource *arg) + u64 tsc_msr; + + /* Disable the TSC page */ +- hv_get_reference_tsc(tsc_msr); ++ tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); + tsc_msr &= ~BIT_ULL(0); +- hv_set_reference_tsc(tsc_msr); ++ hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); + } + + +@@ -356,10 +356,10 @@ static void resume_hv_clock_tsc(struct clocksource *arg) + u64 tsc_msr; + + /* Re-enable the TSC page */ +- hv_get_reference_tsc(tsc_msr); ++ tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); + tsc_msr &= GENMASK_ULL(11, 0); + tsc_msr |= BIT_ULL(0) | (u64)phys_addr; +- hv_set_reference_tsc(tsc_msr); ++ hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); + } + + static int hv_cs_enable(struct clocksource *cs) +@@ -381,14 +381,12 @@ static struct clocksource hyperv_cs_tsc = { + + static u64 notrace read_hv_clock_msr(void) + { +- u64 current_tick; + /* + * Read the partition counter to get the current tick count. This count + * is set to 0 when the partition is created and is incremented in + * 100 nanosecond units. + */ +- hv_get_time_ref_count(current_tick); +- return current_tick; ++ return hv_get_register(HV_REGISTER_TIME_REF_COUNT); + } + + static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) +@@ -466,10 +464,10 @@ static bool __init hv_init_tsc_clocksource(void) + * (which already has at least the low 12 bits set to zero since + * it is page aligned). Also set the "enable" bit, which is bit 0. + */ +- hv_get_reference_tsc(tsc_msr); ++ tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); + tsc_msr &= GENMASK_ULL(11, 0); + tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; +- hv_set_reference_tsc(tsc_msr); ++ hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); + + hv_set_clocksource_vdso(hyperv_cs_tsc); + clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); +diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c +index 3b1ba6c607a1..2fb794d2435d 100644 +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -198,34 +198,36 @@ void hv_synic_enable_regs(unsigned int cpu) + union hv_synic_scontrol sctrl; + + /* Setup the Synic's message page */ +- hv_get_simp(simp.as_uint64); ++ simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); + simp.simp_enabled = 1; + simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) + >> HV_HYP_PAGE_SHIFT; + +- hv_set_simp(simp.as_uint64); ++ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); + + /* Setup the Synic's event page */ +- hv_get_siefp(siefp.as_uint64); ++ siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); + siefp.siefp_enabled = 1; + siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) + >> HV_HYP_PAGE_SHIFT; + +- hv_set_siefp(siefp.as_uint64); ++ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); + + /* Setup the shared SINT. */ +- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); ++ shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + ++ VMBUS_MESSAGE_SINT); + + shared_sint.vector = hv_get_vector(); + shared_sint.masked = false; + shared_sint.auto_eoi = hv_recommend_using_aeoi(); +- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); ++ hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, ++ shared_sint.as_uint64); + + /* Enable the global synic bit */ +- hv_get_synic_state(sctrl.as_uint64); ++ sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); + sctrl.enable = 1; + +- hv_set_synic_state(sctrl.as_uint64); ++ hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + } + + int hv_synic_init(unsigned int cpu) +@@ -247,32 +249,35 @@ void hv_synic_disable_regs(unsigned int cpu) + union hv_synic_siefp siefp; + union hv_synic_scontrol sctrl; + +- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); ++ shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + ++ VMBUS_MESSAGE_SINT); + + shared_sint.masked = 1; + + /* Need to correctly cleanup in the case of SMP!!! */ + /* Disable the interrupt */ +- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); ++ hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, ++ shared_sint.as_uint64); + +- hv_get_simp(simp.as_uint64); ++ simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); + simp.simp_enabled = 0; + simp.base_simp_gpa = 0; + +- hv_set_simp(simp.as_uint64); ++ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); + +- hv_get_siefp(siefp.as_uint64); ++ siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); + siefp.siefp_enabled = 0; + siefp.base_siefp_gpa = 0; + +- hv_set_siefp(siefp.as_uint64); ++ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); + + /* Disable the global synic bit */ +- hv_get_synic_state(sctrl.as_uint64); ++ sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); + sctrl.enable = 0; +- hv_set_synic_state(sctrl.as_uint64); ++ hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + } + ++ + int hv_synic_cleanup(unsigned int cpu) + { + struct vmbus_channel *channel, *sc; +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 7551b5c11a98..a5f55839dfaa 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1505,7 +1505,7 @@ static int vmbus_bus_init(void) + * Register for panic kmsg callback only if the right + * capability is supported by the hypervisor. + */ +- hv_get_crash_ctl(hyperv_crash_ctl); ++ hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) + hv_kmsg_dump_register(); + +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index 2c7500e4810f..41c65e434a0d 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -135,7 +135,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) + * possibly deliver another msg from the + * hypervisor + */ +- hv_signal_eom(); ++ hv_set_register(HV_REGISTER_EOM, 0); + } + } + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:17 -0800 +Subject: [PATCH 43/53] Drivers: hv: vmbus: Handle auto EOI quirk inline + +On x86/x64, Hyper-V provides a flag to indicate auto EOI functionality, +but it doesn't on ARM64. Handle this quirk inline instead of calling +into code under arch/x86 (and coming, under arch/arm64). + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Link: https://lore.kernel.org/r/1614721102-2241-6-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 946f4b8680b8ad177f6489e023a1d95e82d502e2) +--- + arch/x86/include/asm/mshyperv.h | 3 --- + drivers/hv/hv.c | 12 +++++++++++- + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index ec7928d0120b..93b3a4fbec52 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -28,9 +28,6 @@ static inline u64 hv_get_register(unsigned int reg) + return value; + } + +-#define hv_recommend_using_aeoi() \ +- (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)) +- + #define hv_set_clocksource_vdso(val) \ + ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) + #define hv_enable_vdso_clocksource() \ +diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c +index 2fb794d2435d..08ff1d19824e 100644 +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -219,7 +219,17 @@ void hv_synic_enable_regs(unsigned int cpu) + + shared_sint.vector = hv_get_vector(); + shared_sint.masked = false; +- shared_sint.auto_eoi = hv_recommend_using_aeoi(); ++ ++ /* ++ * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), ++ * it doesn't provide a recommendation flag and AEOI must be disabled. ++ */ ++#ifdef HV_DEPRECATING_AEOI_RECOMMENDED ++ shared_sint.auto_eoi = ++ !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); ++#else ++ shared_sint.auto_eoi = 0; ++#endif + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:19 -0800 +Subject: [PATCH 44/53] clocksource/drivers/hyper-v: Handle vDSO differences + inline + +While the driver for the Hyper-V Reference TSC and STIMERs is architecture +neutral, vDSO is implemented for x86/x64, but not for ARM64. Current code +calls into utility functions under arch/x86 (and coming, under arch/arm64) +to handle the difference. + +Change this approach to handle the difference inline based on whether +VDSO_CLOCK_MODE_HVCLOCK is present. The new approach removes code under +arch/* since the difference is tied more to the specifics of the Linux +implementation than to the architecture. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Acked-by: Daniel Lezcano +Link: https://lore.kernel.org/r/1614721102-2241-8-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit e4ab4658f1cff14c82202132f7af2cb5c2741469) +--- + arch/x86/include/asm/mshyperv.h | 4 ---- + drivers/clocksource/hyperv_timer.c | 10 ++++++++-- + 2 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 93b3a4fbec52..1a58715a8399 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -28,10 +28,6 @@ static inline u64 hv_get_register(unsigned int reg) + return value; + } + +-#define hv_set_clocksource_vdso(val) \ +- ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) +-#define hv_enable_vdso_clocksource() \ +- vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); + #define hv_get_raw_timer() rdtsc_ordered() + #define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR + +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index bbda003f8a1c..7a9030ca68c4 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -362,11 +362,13 @@ static void resume_hv_clock_tsc(struct clocksource *arg) + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); + } + ++#ifdef VDSO_CLOCKMODE_HVCLOCK + static int hv_cs_enable(struct clocksource *cs) + { +- hv_enable_vdso_clocksource(); ++ vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); + return 0; + } ++#endif + + static struct clocksource hyperv_cs_tsc = { + .name = "hyperv_clocksource_tsc_page", +@@ -376,7 +378,12 @@ static struct clocksource hyperv_cs_tsc = { + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .suspend= suspend_hv_clock_tsc, + .resume = resume_hv_clock_tsc, ++#ifdef VDSO_CLOCKMODE_HVCLOCK + .enable = hv_cs_enable, ++ .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, ++#else ++ .vdso_clock_mode = VDSO_CLOCKMODE_NONE, ++#endif + }; + + static u64 notrace read_hv_clock_msr(void) +@@ -469,7 +476,6 @@ static bool __init hv_init_tsc_clocksource(void) + tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); + +- hv_set_clocksource_vdso(hyperv_cs_tsc); + clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); + + hv_sched_clock_offset = hv_read_reference_counter(); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 13 May 2021 09:32:46 +0200 +Subject: [PATCH 45/53] clocksource/drivers/hyper-v: Re-enable + VDSO_CLOCKMODE_HVCLOCK on X86 + +Mohammed reports (https://bugzilla.kernel.org/show_bug.cgi?id=213029) +the commit e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO +differences inline") broke vDSO on x86. The problem appears to be that +VDSO_CLOCKMODE_HVCLOCK is an enum value in 'enum vdso_clock_mode' and +'#ifdef VDSO_CLOCKMODE_HVCLOCK' branch evaluates to false (it is not +a define). + +Use a dedicated HAVE_VDSO_CLOCKMODE_HVCLOCK define instead. + +Fixes: e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO differences inline") +Reported-by: Mohammed Gamal +Suggested-by: Thomas Gleixner +Signed-off-by: Vitaly Kuznetsov +Signed-off-by: Thomas Gleixner +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20210513073246.1715070-1-vkuznets@redhat.com + +(cherry picked from commit 3486d2c9be652a31033363bdd50391b0c8a8fe21) +--- + arch/x86/include/asm/vdso/clocksource.h | 2 ++ + drivers/clocksource/hyperv_timer.c | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h +index 119ac8612d89..136e5e57cfe1 100644 +--- a/arch/x86/include/asm/vdso/clocksource.h ++++ b/arch/x86/include/asm/vdso/clocksource.h +@@ -7,4 +7,6 @@ + VDSO_CLOCKMODE_PVCLOCK, \ + VDSO_CLOCKMODE_HVCLOCK + ++#define HAVE_VDSO_CLOCKMODE_HVCLOCK ++ + #endif /* __ASM_VDSO_CLOCKSOURCE_H */ +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index 7a9030ca68c4..1259402569bb 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -362,7 +362,7 @@ static void resume_hv_clock_tsc(struct clocksource *arg) + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); + } + +-#ifdef VDSO_CLOCKMODE_HVCLOCK ++#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK + static int hv_cs_enable(struct clocksource *cs) + { + vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); +@@ -378,7 +378,7 @@ static struct clocksource hyperv_cs_tsc = { + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .suspend= suspend_hv_clock_tsc, + .resume = resume_hv_clock_tsc, +-#ifdef VDSO_CLOCKMODE_HVCLOCK ++#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK + .enable = hv_cs_enable, + .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, + #else +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:18 -0800 +Subject: [PATCH 46/53] Drivers: hv: vmbus: Move handling of VMbus interrupts + +VMbus interrupts are most naturally modelled as per-cpu IRQs. But +because x86/x64 doesn't have per-cpu IRQs, the core VMbus interrupt +handling machinery is done in code under arch/x86 and Linux IRQs are +not used. Adding support for ARM64 means adding equivalent code +using per-cpu IRQs under arch/arm64. + +A better model is to treat per-cpu IRQs as the normal path (which it is +for modern architectures), and the x86/x64 path as the exception. Do this +by incorporating standard Linux per-cpu IRQ allocation into the main VMbus +driver, and bypassing it in the x86/x64 exception case. For x86/x64, +special case code is retained under arch/x86, but no VMbus interrupt +handling code is needed under arch/arm64. + +No functional change. + +Signed-off-by: Michael Kelley +Reviewed-by: Boqun Feng +Link: https://lore.kernel.org/r/1614721102-2241-7-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit d608715d4771cf2d63de07a5d7b026b6f52a70a5) +--- + arch/x86/include/asm/mshyperv.h | 1 - + arch/x86/kernel/cpu/mshyperv.c | 13 +++---- + drivers/hv/hv.c | 8 ++++- + drivers/hv/vmbus_drv.c | 63 +++++++++++++++++++++++++++++---- + include/asm-generic/mshyperv.h | 7 ++-- + 5 files changed, 70 insertions(+), 22 deletions(-) + +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 1a58715a8399..3004d8b2821e 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -29,7 +29,6 @@ static inline u64 hv_get_register(unsigned int reg) + } + + #define hv_get_raw_timer() rdtsc_ordered() +-#define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR + + void hyperv_vector_handler(struct pt_regs *regs); + +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 16a5a901cde3..76c904d2f2d6 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -60,23 +60,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) + set_irq_regs(old_regs); + } + +-int hv_setup_vmbus_irq(int irq, void (*handler)(void)) ++void hv_setup_vmbus_handler(void (*handler)(void)) + { +- /* +- * The 'irq' argument is ignored on x86/x64 because a hard-coded +- * interrupt vector is used for Hyper-V interrupts. +- */ + vmbus_handler = handler; +- return 0; + } ++EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); + +-void hv_remove_vmbus_irq(void) ++void hv_remove_vmbus_handler(void) + { + /* We have no way to deallocate the interrupt gate */ + vmbus_handler = NULL; + } +-EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); +-EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); ++EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); + + /* + * Routines to do per-architecture handling of stimer0 +diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c +index 08ff1d19824e..147abe31b540 100644 +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include "hyperv_vmbus.h" +@@ -214,10 +215,12 @@ void hv_synic_enable_regs(unsigned int cpu) + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); + + /* Setup the shared SINT. */ ++ if (vmbus_irq != -1) ++ enable_percpu_irq(vmbus_irq, 0); + shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + + VMBUS_MESSAGE_SINT); + +- shared_sint.vector = hv_get_vector(); ++ shared_sint.vector = vmbus_interrupt; + shared_sint.masked = false; + + /* +@@ -285,6 +288,9 @@ void hv_synic_disable_regs(unsigned int cpu) + sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); + sctrl.enable = 0; + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); ++ ++ if (vmbus_irq != -1) ++ disable_percpu_irq(vmbus_irq); + } + + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index a5f55839dfaa..8efac158ea61 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -48,8 +48,10 @@ static int hyperv_cpuhp_online; + + static void *hv_panic_page; + ++static long __percpu *vmbus_evt; ++ + /* Values parsed from ACPI DSDT */ +-static int vmbus_irq; ++int vmbus_irq; + int vmbus_interrupt; + + /* +@@ -1351,7 +1353,13 @@ static void vmbus_isr(void) + tasklet_schedule(&hv_cpu->msg_dpc); + } + +- add_interrupt_randomness(hv_get_vector(), 0); ++ add_interrupt_randomness(vmbus_interrupt, 0); ++} ++ ++static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) ++{ ++ vmbus_isr(); ++ return IRQ_HANDLED; + } + + /* +@@ -1466,9 +1474,28 @@ static int vmbus_bus_init(void) + if (ret) + return ret; + +- ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr); +- if (ret) +- goto err_setup; ++ /* ++ * VMbus interrupts are best modeled as per-cpu interrupts. If ++ * on an architecture with support for per-cpu IRQs (e.g. ARM64), ++ * allocate a per-cpu IRQ using standard Linux kernel functionality. ++ * If not on such an architecture (e.g., x86/x64), then rely on ++ * code in the arch-specific portion of the code tree to connect ++ * the VMbus interrupt handler. ++ */ ++ ++ if (vmbus_irq == -1) { ++ hv_setup_vmbus_handler(vmbus_isr); ++ } else { ++ vmbus_evt = alloc_percpu(long); ++ ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, ++ "Hyper-V VMbus", vmbus_evt); ++ if (ret) { ++ pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", ++ vmbus_irq, ret); ++ free_percpu(vmbus_evt); ++ goto err_setup; ++ } ++ } + + ret = hv_synic_alloc(); + if (ret) +@@ -1529,7 +1556,12 @@ static int vmbus_bus_init(void) + err_cpuhp: + hv_synic_free(); + err_alloc: +- hv_remove_vmbus_irq(); ++ if (vmbus_irq == -1) { ++ hv_remove_vmbus_handler(); ++ } else { ++ free_percpu_irq(vmbus_irq, vmbus_evt); ++ free_percpu(vmbus_evt); ++ } + err_setup: + bus_unregister(&hv_bus); + unregister_sysctl_table(hv_ctl_table_hdr); +@@ -2644,6 +2676,18 @@ static int __init hv_acpi_init(void) + ret = -ETIMEDOUT; + goto cleanup; + } ++ ++ /* ++ * If we're on an architecture with a hardcoded hypervisor ++ * vector (i.e. x86/x64), override the VMbus interrupt found ++ * in the ACPI tables. Ensure vmbus_irq is not set since the ++ * normal Linux IRQ mechanism is not used in this case. ++ */ ++#ifdef HYPERVISOR_CALLBACK_VECTOR ++ vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR; ++ vmbus_irq = -1; ++#endif ++ + hv_debug_init(); + + ret = vmbus_bus_init(); +@@ -2674,7 +2718,12 @@ static void __exit vmbus_exit(void) + vmbus_connection.conn_state = DISCONNECTED; + hv_stimer_global_cleanup(); + vmbus_disconnect(); +- hv_remove_vmbus_irq(); ++ if (vmbus_irq == -1) { ++ hv_remove_vmbus_handler(); ++ } else { ++ free_percpu_irq(vmbus_irq, vmbus_evt); ++ free_percpu(vmbus_evt); ++ } + for_each_online_cpu(cpu) { + struct hv_per_cpu_context *hv_cpu + = per_cpu_ptr(hv_context.cpu_context, cpu); +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index 41c65e434a0d..f9cde867c892 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -139,10 +139,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) + } + } + +-int hv_setup_vmbus_irq(int irq, void (*handler)(void)); +-void hv_remove_vmbus_irq(void); +-void hv_enable_vmbus_irq(void); +-void hv_disable_vmbus_irq(void); ++void hv_setup_vmbus_handler(void (*handler)(void)); ++void hv_remove_vmbus_handler(void); + + void hv_setup_kexec_handler(void (*handler)(void)); + void hv_remove_kexec_handler(void); +@@ -150,6 +148,7 @@ void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); + void hv_remove_crash_handler(void); + + extern int vmbus_interrupt; ++extern int vmbus_irq; + + #if IS_ENABLED(CONFIG_HYPERV) + /* +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 2 Mar 2021 13:38:22 -0800 +Subject: [PATCH 47/53] clocksource/drivers/hyper-v: Move handling of STIMER0 + interrupts + +STIMER0 interrupts are most naturally modeled as per-cpu IRQs. But +because x86/x64 doesn't have per-cpu IRQs, the core STIMER0 interrupt +handling machinery is done in code under arch/x86 and Linux IRQs are +not used. Adding support for ARM64 means adding equivalent code +using per-cpu IRQs under arch/arm64. + +A better model is to treat per-cpu IRQs as the normal path (which it is +for modern architectures), and the x86/x64 path as the exception. Do this +by incorporating standard Linux per-cpu IRQ allocation into the main +SITMER0 driver code, and bypass it in the x86/x64 exception case. For +x86/x64, special case code is retained under arch/x86, but no STIMER0 +interrupt handling code is needed under arch/arm64. + +No functional change. + +Signed-off-by: Michael Kelley +Acked-by: Daniel Lezcano +Link: https://lore.kernel.org/r/1614721102-2241-11-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit ec866be6ec547c9e1cc4451f04250e08b5fe67c7) +--- + arch/x86/hyperv/hv_init.c | 2 +- + arch/x86/include/asm/mshyperv.h | 4 - + arch/x86/kernel/cpu/mshyperv.c | 10 +- + drivers/clocksource/hyperv_timer.c | 168 ++++++++++++++++++++--------- + include/asm-generic/mshyperv.h | 5 - + include/clocksource/hyperv_timer.h | 3 +- + 6 files changed, 120 insertions(+), 72 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 48a5c45c09cb..64b6ebbb1a06 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -327,7 +327,7 @@ static void __init hv_stimer_setup_percpu_clockev(void) + * Ignore any errors in setting up stimer clockevents + * as we can run with the LAPIC timer as a fallback. + */ +- (void)hv_stimer_alloc(); ++ (void)hv_stimer_alloc(false); + + /* + * Still register the LAPIC timer, because the direct-mode STIMER is +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 3004d8b2821e..67ff0d637e55 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -32,10 +32,6 @@ static inline u64 hv_get_register(unsigned int reg) + + void hyperv_vector_handler(struct pt_regs *regs); + +-static inline void hv_enable_stimer0_percpu_irq(int irq) {} +-static inline void hv_disable_stimer0_percpu_irq(int irq) {} +- +- + #if IS_ENABLED(CONFIG_HYPERV) + extern int hyperv_init_cpuhp; + +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 76c904d2f2d6..9ddc74e475ca 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -90,21 +90,17 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) + set_irq_regs(old_regs); + } + +-int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) ++/* For x86/x64, override weak placeholders in hyperv_timer.c */ ++void hv_setup_stimer0_handler(void (*handler)(void)) + { +- *vector = HYPERV_STIMER0_VECTOR; +- *irq = -1; /* Unused on x86/x64 */ + hv_stimer0_handler = handler; +- return 0; + } +-EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq); + +-void hv_remove_stimer0_irq(int irq) ++void hv_remove_stimer0_handler(void) + { + /* We have no way to deallocate the interrupt gate */ + hv_stimer0_handler = NULL; + } +-EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq); + + void hv_setup_kexec_handler(void (*handler)(void)) + { +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index 1259402569bb..150a3f7174a9 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -18,6 +18,9 @@ + #include + #include + #include ++#include ++#include ++#include + #include + #include + #include +@@ -43,14 +46,13 @@ static u64 hv_sched_clock_offset __ro_after_init; + */ + static bool direct_mode_enabled; + +-static int stimer0_irq; +-static int stimer0_vector; ++static int stimer0_irq = -1; + static int stimer0_message_sint; ++static DEFINE_PER_CPU(long, stimer0_evt); + + /* +- * ISR for when stimer0 is operating in Direct Mode. Direct Mode +- * does not use VMbus or any VMbus messages, so process here and not +- * in the VMbus driver code. ++ * Common code for stimer0 interrupts coming via Direct Mode or ++ * as a VMbus message. + */ + void hv_stimer0_isr(void) + { +@@ -61,6 +63,16 @@ void hv_stimer0_isr(void) + } + EXPORT_SYMBOL_GPL(hv_stimer0_isr); + ++/* ++ * stimer0 interrupt handler for architectures that support ++ * per-cpu interrupts, which also implies Direct Mode. ++ */ ++static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) ++{ ++ hv_stimer0_isr(); ++ return IRQ_HANDLED; ++} ++ + static int hv_ce_set_next_event(unsigned long delta, + struct clock_event_device *evt) + { +@@ -76,8 +88,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt) + { + hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); + hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); +- if (direct_mode_enabled) +- hv_disable_stimer0_percpu_irq(stimer0_irq); ++ if (direct_mode_enabled && stimer0_irq >= 0) ++ disable_percpu_irq(stimer0_irq); + + return 0; + } +@@ -95,8 +107,9 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) + * on the specified hardware vector/IRQ. + */ + timer_cfg.direct_mode = 1; +- timer_cfg.apic_vector = stimer0_vector; +- hv_enable_stimer0_percpu_irq(stimer0_irq); ++ timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; ++ if (stimer0_irq >= 0) ++ enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); + } else { + /* + * When it expires, the timer will generate a VMbus message, +@@ -169,10 +182,58 @@ int hv_stimer_cleanup(unsigned int cpu) + } + EXPORT_SYMBOL_GPL(hv_stimer_cleanup); + ++/* ++ * These placeholders are overridden by arch specific code on ++ * architectures that need special setup of the stimer0 IRQ because ++ * they don't support per-cpu IRQs (such as x86/x64). ++ */ ++void __weak hv_setup_stimer0_handler(void (*handler)(void)) ++{ ++}; ++ ++void __weak hv_remove_stimer0_handler(void) ++{ ++}; ++ ++/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ ++static int hv_setup_stimer0_irq(void) ++{ ++ int ret; ++ ++ ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, ++ ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); ++ if (ret < 0) { ++ pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); ++ return ret; ++ } ++ stimer0_irq = ret; ++ ++ ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, ++ "Hyper-V stimer0", &stimer0_evt); ++ if (ret) { ++ pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", ++ stimer0_irq, ret); ++ acpi_unregister_gsi(stimer0_irq); ++ stimer0_irq = -1; ++ } ++ return ret; ++} ++ ++static void hv_remove_stimer0_irq(void) ++{ ++ if (stimer0_irq == -1) { ++ hv_remove_stimer0_handler(); ++ } else { ++ free_percpu_irq(stimer0_irq, &stimer0_evt); ++ acpi_unregister_gsi(stimer0_irq); ++ stimer0_irq = -1; ++ } ++} ++ + /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ +-int hv_stimer_alloc(void) ++int hv_stimer_alloc(bool have_percpu_irqs) + { +- int ret = 0; ++ int ret; + + /* + * Synthetic timers are always available except on old versions of +@@ -188,29 +249,37 @@ int hv_stimer_alloc(void) + + direct_mode_enabled = ms_hyperv.misc_features & + HV_STIMER_DIRECT_MODE_AVAILABLE; +- if (direct_mode_enabled) { +- ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, +- hv_stimer0_isr); ++ ++ /* ++ * If Direct Mode isn't enabled, the remainder of the initialization ++ * is done later by hv_stimer_legacy_init() ++ */ ++ if (!direct_mode_enabled) ++ return 0; ++ ++ if (have_percpu_irqs) { ++ ret = hv_setup_stimer0_irq(); + if (ret) +- goto free_percpu; ++ goto free_clock_event; ++ } else { ++ hv_setup_stimer0_handler(hv_stimer0_isr); ++ } + +- /* +- * Since we are in Direct Mode, stimer initialization +- * can be done now with a CPUHP value in the same range +- * as other clockevent devices. +- */ +- ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, +- "clockevents/hyperv/stimer:starting", +- hv_stimer_init, hv_stimer_cleanup); +- if (ret < 0) +- goto free_stimer0_irq; ++ /* ++ * Since we are in Direct Mode, stimer initialization ++ * can be done now with a CPUHP value in the same range ++ * as other clockevent devices. ++ */ ++ ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, ++ "clockevents/hyperv/stimer:starting", ++ hv_stimer_init, hv_stimer_cleanup); ++ if (ret < 0) { ++ hv_remove_stimer0_irq(); ++ goto free_clock_event; + } + return ret; + +-free_stimer0_irq: +- hv_remove_stimer0_irq(stimer0_irq); +- stimer0_irq = 0; +-free_percpu: ++free_clock_event: + free_percpu(hv_clock_event); + hv_clock_event = NULL; + return ret; +@@ -254,23 +323,6 @@ void hv_stimer_legacy_cleanup(unsigned int cpu) + } + EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); + +- +-/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ +-void hv_stimer_free(void) +-{ +- if (!hv_clock_event) +- return; +- +- if (direct_mode_enabled) { +- cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); +- hv_remove_stimer0_irq(stimer0_irq); +- stimer0_irq = 0; +- } +- free_percpu(hv_clock_event); +- hv_clock_event = NULL; +-} +-EXPORT_SYMBOL_GPL(hv_stimer_free); +- + /* + * Do a global cleanup of clockevents for the cases of kexec and + * vmbus exit +@@ -287,12 +339,17 @@ void hv_stimer_global_cleanup(void) + hv_stimer_legacy_cleanup(cpu); + } + +- /* +- * If Direct Mode is enabled, the cpuhp teardown callback +- * (hv_stimer_cleanup) will be run on all CPUs to stop the +- * stimers. +- */ +- hv_stimer_free(); ++ if (!hv_clock_event) ++ return; ++ ++ if (direct_mode_enabled) { ++ cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); ++ hv_remove_stimer0_irq(); ++ stimer0_irq = -1; ++ } ++ free_percpu(hv_clock_event); ++ hv_clock_event = NULL; ++ + } + EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); + +@@ -457,9 +514,14 @@ static bool __init hv_init_tsc_clocksource(void) + * Hyper-V Reference TSC rating, causing the generic TSC to be used. + * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference + * TSC will be preferred over the virtualized ARM64 arch counter. ++ * While the Hyper-V MSR clocksource won't be used since the ++ * Reference TSC clocksource is present, change its rating as ++ * well for consistency. + */ +- if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) ++ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { + hyperv_cs_tsc.rating = 250; ++ hyperv_cs_msr.rating = 250; ++ } + + hv_read_reference_counter = read_hv_clock_tsc; + phys_addr = virt_to_phys(hv_get_tsc_page()); +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index f9cde867c892..e13f137a0ba8 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -231,9 +231,4 @@ static inline bool hv_is_hibernation_supported(void) { return false; } + static inline void hyperv_cleanup(void) {} + #endif /* CONFIG_HYPERV */ + +-#if IS_ENABLED(CONFIG_HYPERV) +-extern int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); +-extern void hv_remove_stimer0_irq(int irq); +-#endif +- + #endif +diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h +index 34eef083c988..b6774aa5a4b8 100644 +--- a/include/clocksource/hyperv_timer.h ++++ b/include/clocksource/hyperv_timer.h +@@ -21,8 +21,7 @@ + #define HV_MIN_DELTA_TICKS 1 + + /* Routines called by the VMbus driver */ +-extern int hv_stimer_alloc(void); +-extern void hv_stimer_free(void); ++extern int hv_stimer_alloc(bool have_percpu_irqs); + extern int hv_stimer_cleanup(unsigned int cpu); + extern void hv_stimer_legacy_init(unsigned int cpu, int sint); + extern void hv_stimer_legacy_cleanup(unsigned int cpu); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 14 Jul 2021 11:34:45 -0700 +Subject: [PATCH 48/53] Drivers: hv: Make portions of Hyper-V init code be arch + neutral + +The code to allocate and initialize the hv_vp_index array is +architecture neutral. Similarly, the code to allocate and +populate the hypercall input and output arg pages is architecture +neutral. Move both sets of code out from arch/x86 and into +utility functions in drivers/hv/hv_common.c that can be shared +by Hyper-V initialization on ARM64. + +No functional changes. However, the allocation of the hypercall +input and output arg pages is done differently so that the +size is always the Hyper-V page size, even if not the same as +the guest page size (such as with ARM64's 64K page size). + +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/1626287687-2045-2-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit afca4d95dd7d7936d46a0ff02169cc40f534a6a3) +--- + arch/x86/hyperv/hv_init.c | 91 +++------------------ + arch/x86/include/asm/mshyperv.h | 4 - + arch/x86/kernel/cpu/mshyperv.c | 3 - + drivers/hv/hv_common.c | 138 ++++++++++++++++++++++++++++++++ + include/asm-generic/mshyperv.h | 10 +++ + 5 files changed, 158 insertions(+), 88 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 64b6ebbb1a06..8e4c05ba541f 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -39,48 +39,17 @@ EXPORT_SYMBOL_GPL(hv_hypercall_pg); + /* Storage to save the hypercall page temporarily for hibernation */ + static void *hv_hypercall_pg_saved; + +-u32 *hv_vp_index; +-EXPORT_SYMBOL_GPL(hv_vp_index); +- + struct hv_vp_assist_page **hv_vp_assist_page; + EXPORT_SYMBOL_GPL(hv_vp_assist_page); + +-void __percpu **hyperv_pcpu_input_arg; +-EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); +- +-void __percpu **hyperv_pcpu_output_arg; +-EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); +- +-u32 hv_max_vp_index; +-EXPORT_SYMBOL_GPL(hv_max_vp_index); +- + static int hv_cpu_init(unsigned int cpu) + { +- u64 msr_vp_index; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; +- void **input_arg; +- struct page *pg; +- +- /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ +- pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0); +- if (unlikely(!pg)) +- return -ENOMEM; +- +- input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); +- *input_arg = page_address(pg); +- if (hv_root_partition) { +- void **output_arg; +- +- output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); +- *output_arg = page_address(pg + 1); +- } +- +- msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); +- +- hv_vp_index[smp_processor_id()] = msr_vp_index; ++ int ret; + +- if (msr_vp_index > hv_max_vp_index) +- hv_max_vp_index = msr_vp_index; ++ ret = hv_common_cpu_init(cpu); ++ if (ret) ++ return ret; + + if (!hv_vp_assist_page) + return 0; +@@ -198,25 +167,8 @@ static int hv_cpu_die(unsigned int cpu) + { + struct hv_reenlightenment_control re_ctrl; + unsigned int new_cpu; +- unsigned long flags; +- void **input_arg; +- void *pg; + +- local_irq_save(flags); +- input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); +- pg = *input_arg; +- *input_arg = NULL; +- +- if (hv_root_partition) { +- void **output_arg; +- +- output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); +- *output_arg = NULL; +- } +- +- local_irq_restore(flags); +- +- free_pages((unsigned long)pg, hv_root_partition ? 1 : 0); ++ hv_common_cpu_die(cpu); + + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); +@@ -368,7 +320,7 @@ void __init hyperv_init(void) + { + u64 guest_id, required_msrs; + union hv_x64_msr_hypercall_contents hypercall_msr; +- int cpuhp, i; ++ int cpuhp; + + if (x86_hyper_type != X86_HYPER_MS_HYPERV) + return; +@@ -380,36 +332,14 @@ void __init hyperv_init(void) + if ((ms_hyperv.features & required_msrs) != required_msrs) + return; + +- /* +- * Allocate the per-CPU state for the hypercall input arg. +- * If this allocation fails, we will not be able to setup +- * (per-CPU) hypercall input page and thus this failure is +- * fatal on Hyper-V. +- */ +- hyperv_pcpu_input_arg = alloc_percpu(void *); +- +- BUG_ON(hyperv_pcpu_input_arg == NULL); +- +- /* Allocate the per-CPU state for output arg for root */ +- if (hv_root_partition) { +- hyperv_pcpu_output_arg = alloc_percpu(void *); +- BUG_ON(hyperv_pcpu_output_arg == NULL); +- } +- +- /* Allocate percpu VP index */ +- hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), +- GFP_KERNEL); +- if (!hv_vp_index) ++ if (hv_common_init()) + return; + +- for (i = 0; i < num_possible_cpus(); i++) +- hv_vp_index[i] = VP_INVAL; +- + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), GFP_KERNEL); + if (!hv_vp_assist_page) { + ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; +- goto free_vp_index; ++ goto common_free; + } + + cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", +@@ -507,9 +437,8 @@ void __init hyperv_init(void) + free_vp_assist_page: + kfree(hv_vp_assist_page); + hv_vp_assist_page = NULL; +-free_vp_index: +- kfree(hv_vp_index); +- hv_vp_index = NULL; ++common_free: ++ hv_common_free(); + } + + /* +diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h +index 67ff0d637e55..adccbc209169 100644 +--- a/arch/x86/include/asm/mshyperv.h ++++ b/arch/x86/include/asm/mshyperv.h +@@ -36,8 +36,6 @@ void hyperv_vector_handler(struct pt_regs *regs); + extern int hyperv_init_cpuhp; + + extern void *hv_hypercall_pg; +-extern void __percpu **hyperv_pcpu_input_arg; +-extern void __percpu **hyperv_pcpu_output_arg; + + extern u64 hv_current_partition_id; + +@@ -170,8 +168,6 @@ int hyperv_fill_flush_guest_mapping_list( + struct hv_guest_mapping_flush_list *flush, + u64 start_gfn, u64 end_gfn); + +-extern bool hv_root_partition; +- + #ifdef CONFIG_X86_64 + void hv_apic_init(void); + void __init hv_init_spinlocks(void); +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 9ddc74e475ca..79223a0c863e 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -35,10 +35,7 @@ + + /* Is Linux running as the root partition? */ + bool hv_root_partition; +-EXPORT_SYMBOL_GPL(hv_root_partition); +- + struct ms_hyperv_info ms_hyperv; +-EXPORT_SYMBOL_GPL(ms_hyperv); + + #if IS_ENABLED(CONFIG_HYPERV) + static void (*vmbus_handler)(void); +diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c +index f0053c786891..caba4f728987 100644 +--- a/drivers/hv/hv_common.c ++++ b/drivers/hv/hv_common.c +@@ -15,9 +15,147 @@ + #include + #include + #include ++#include ++#include + #include + #include + ++/* ++ * hv_root_partition and ms_hyperv are defined here with other Hyper-V ++ * specific globals so they are shared across all architectures and are ++ * built only when CONFIG_HYPERV is defined. But on x86, ++ * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not ++ * defined, and it uses these two variables. So mark them as __weak ++ * here, allowing for an overriding definition in the module containing ++ * ms_hyperv_init_platform(). ++ */ ++bool __weak hv_root_partition; ++EXPORT_SYMBOL_GPL(hv_root_partition); ++ ++struct ms_hyperv_info __weak ms_hyperv; ++EXPORT_SYMBOL_GPL(ms_hyperv); ++ ++u32 *hv_vp_index; ++EXPORT_SYMBOL_GPL(hv_vp_index); ++ ++u32 hv_max_vp_index; ++EXPORT_SYMBOL_GPL(hv_max_vp_index); ++ ++void __percpu **hyperv_pcpu_input_arg; ++EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); ++ ++void __percpu **hyperv_pcpu_output_arg; ++EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); ++ ++/* ++ * Hyper-V specific initialization and shutdown code that is ++ * common across all architectures. Called from architecture ++ * specific initialization functions. ++ */ ++ ++void __init hv_common_free(void) ++{ ++ kfree(hv_vp_index); ++ hv_vp_index = NULL; ++ ++ free_percpu(hyperv_pcpu_output_arg); ++ hyperv_pcpu_output_arg = NULL; ++ ++ free_percpu(hyperv_pcpu_input_arg); ++ hyperv_pcpu_input_arg = NULL; ++} ++ ++int __init hv_common_init(void) ++{ ++ int i; ++ ++ /* ++ * Allocate the per-CPU state for the hypercall input arg. ++ * If this allocation fails, we will not be able to setup ++ * (per-CPU) hypercall input page and thus this failure is ++ * fatal on Hyper-V. ++ */ ++ hyperv_pcpu_input_arg = alloc_percpu(void *); ++ BUG_ON(!hyperv_pcpu_input_arg); ++ ++ /* Allocate the per-CPU state for output arg for root */ ++ if (hv_root_partition) { ++ hyperv_pcpu_output_arg = alloc_percpu(void *); ++ BUG_ON(!hyperv_pcpu_output_arg); ++ } ++ ++ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), ++ GFP_KERNEL); ++ if (!hv_vp_index) { ++ hv_common_free(); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < num_possible_cpus(); i++) ++ hv_vp_index[i] = VP_INVAL; ++ ++ return 0; ++} ++ ++/* ++ * Hyper-V specific initialization and die code for ++ * individual CPUs that is common across all architectures. ++ * Called by the CPU hotplug mechanism. ++ */ ++ ++int hv_common_cpu_init(unsigned int cpu) ++{ ++ void **inputarg, **outputarg; ++ u64 msr_vp_index; ++ gfp_t flags; ++ int pgcount = hv_root_partition ? 2 : 1; ++ ++ /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ ++ flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; ++ ++ inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); ++ *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); ++ if (!(*inputarg)) ++ return -ENOMEM; ++ ++ if (hv_root_partition) { ++ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); ++ *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; ++ } ++ ++ msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); ++ ++ hv_vp_index[cpu] = msr_vp_index; ++ ++ if (msr_vp_index > hv_max_vp_index) ++ hv_max_vp_index = msr_vp_index; ++ ++ return 0; ++} ++ ++int hv_common_cpu_die(unsigned int cpu) ++{ ++ unsigned long flags; ++ void **inputarg, **outputarg; ++ void *mem; ++ ++ local_irq_save(flags); ++ ++ inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); ++ mem = *inputarg; ++ *inputarg = NULL; ++ ++ if (hv_root_partition) { ++ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); ++ *outputarg = NULL; ++ } ++ ++ local_irq_restore(flags); ++ ++ kfree(mem); ++ ++ return 0; ++} + + /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ + bool hv_query_ext_cap(u64 cap_query) +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index e13f137a0ba8..977166ff8672 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -39,6 +39,9 @@ struct ms_hyperv_info { + }; + extern struct ms_hyperv_info ms_hyperv; + ++extern void __percpu **hyperv_pcpu_input_arg; ++extern void __percpu **hyperv_pcpu_output_arg; ++ + extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); + extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); + +@@ -150,6 +153,8 @@ void hv_remove_crash_handler(void); + extern int vmbus_interrupt; + extern int vmbus_irq; + ++extern bool hv_root_partition; ++ + #if IS_ENABLED(CONFIG_HYPERV) + /* + * Hypervisor's notion of virtual processor ID is different from +@@ -163,6 +168,11 @@ extern u32 hv_max_vp_index; + /* Sentinel value for an uninitialized entry in hv_vp_index array */ + #define VP_INVAL U32_MAX + ++int __init hv_common_init(void); ++void __init hv_common_free(void); ++int hv_common_cpu_init(unsigned int cpu); ++int hv_common_cpu_die(unsigned int cpu); ++ + void *hv_alloc_hyperv_page(void); + void *hv_alloc_hyperv_zeroed_page(void); + void hv_free_hyperv_page(unsigned long addr); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 14 Jul 2021 11:34:46 -0700 +Subject: [PATCH 49/53] Drivers: hv: Add arch independent default functions for + some Hyper-V handlers + +Architecture independent Hyper-V code calls various arch-specific handlers +when needed. To aid in supporting multiple architectures, provide weak +defaults that can be overridden by arch-specific implementations where +appropriate. But when arch-specific overrides aren't needed or haven't +been implemented yet for a particular architecture, these stubs reduce +the amount of clutter under arch/. + +No functional change. + +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/1626287687-2045-3-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 9d7cf2c9675838c12cd5cf5a4ebe2ba41bd78a44) +--- + arch/x86/hyperv/hv_init.c | 2 -- + arch/x86/kernel/cpu/mshyperv.c | 6 ----- + drivers/hv/hv_common.c | 49 ++++++++++++++++++++++++++++++++++ + 3 files changed, 49 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 8e4c05ba541f..0ec2222e35fe 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -468,7 +468,6 @@ void hyperv_cleanup(void) + hypercall_msr.as_uint64 = 0; + wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); + } +-EXPORT_SYMBOL_GPL(hyperv_cleanup); + + void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) + { +@@ -542,4 +541,3 @@ bool hv_is_isolation_supported(void) + { + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; + } +-EXPORT_SYMBOL_GPL(hv_is_isolation_supported); +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 79223a0c863e..617b7f35cc8f 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -61,14 +61,12 @@ void hv_setup_vmbus_handler(void (*handler)(void)) + { + vmbus_handler = handler; + } +-EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); + + void hv_remove_vmbus_handler(void) + { + /* We have no way to deallocate the interrupt gate */ + vmbus_handler = NULL; + } +-EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); + + /* + * Routines to do per-architecture handling of stimer0 +@@ -103,25 +101,21 @@ void hv_setup_kexec_handler(void (*handler)(void)) + { + hv_kexec_handler = handler; + } +-EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); + + void hv_remove_kexec_handler(void) + { + hv_kexec_handler = NULL; + } +-EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); + + void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) + { + hv_crash_handler = handler; + } +-EXPORT_SYMBOL_GPL(hv_setup_crash_handler); + + void hv_remove_crash_handler(void) + { + hv_crash_handler = NULL; + } +-EXPORT_SYMBOL_GPL(hv_remove_crash_handler); + + #ifdef CONFIG_KEXEC_CORE + static void hv_machine_shutdown(void) +diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c +index caba4f728987..cac9c6bfc653 100644 +--- a/drivers/hv/hv_common.c ++++ b/drivers/hv/hv_common.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -202,3 +203,51 @@ bool hv_query_ext_cap(u64 cap_query) + return hv_extended_cap & cap_query; + } + EXPORT_SYMBOL_GPL(hv_query_ext_cap); ++ ++/* These __weak functions provide default "no-op" behavior and ++ * may be overridden by architecture specific versions. Architectures ++ * for which the default "no-op" behavior is sufficient can leave ++ * them unimplemented and not be cluttered with a bunch of stub ++ * functions in arch-specific code. ++ */ ++ ++bool __weak hv_is_isolation_supported(void) ++{ ++ return false; ++} ++EXPORT_SYMBOL_GPL(hv_is_isolation_supported); ++ ++void __weak hv_setup_vmbus_handler(void (*handler)(void)) ++{ ++} ++EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); ++ ++void __weak hv_remove_vmbus_handler(void) ++{ ++} ++EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); ++ ++void __weak hv_setup_kexec_handler(void (*handler)(void)) ++{ ++} ++EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); ++ ++void __weak hv_remove_kexec_handler(void) ++{ ++} ++EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); ++ ++void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) ++{ ++} ++EXPORT_SYMBOL_GPL(hv_setup_crash_handler); ++ ++void __weak hv_remove_crash_handler(void) ++{ ++} ++EXPORT_SYMBOL_GPL(hv_remove_crash_handler); ++ ++void __weak hyperv_cleanup(void) ++{ ++} ++EXPORT_SYMBOL_GPL(hyperv_cleanup); +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boqun Feng +Date: Tue, 27 Jul 2021 02:06:56 +0800 +Subject: [PATCH 50/53] PCI: hv: Set up MSI domain at bridge probing time + +Since PCI_HYPERV depends on PCI_MSI_IRQ_DOMAIN which selects +GENERIC_MSI_IRQ_DOMAIN, we can use dev_set_msi_domain() to set up the +MSI domain at probing time, and this works for both x86 and ARM64. + +Therefore use it as the preparation for ARM64 Hyper-V PCI support. + +As a result, no longer need to maintain ->fwnode in x86 specific +pci_sysdata, and make hv_pcibus_device own it instead. + +Link: https://lore.kernel.org/r/20210726180657.142727-8-boqun.feng@gmail.com +Signed-off-by: Boqun Feng +Signed-off-by: Lorenzo Pieralisi +(cherry picked from commit 9e7f9178ab4943b3a7294a12bc38925c515ca3f0) +--- + drivers/pci/controller/pci-hyperv.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index 6992e3e89768..f6ee9f359a75 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -455,6 +455,7 @@ struct hv_pcibus_device { + struct pci_config_window sysdata; + #endif + struct pci_host_bridge *bridge; ++ struct fwnode_handle *fwnode; + /* Protocol version negotiated with the host */ + enum pci_protocol_version_t protocol_version; + enum hv_pcibus_state state; +@@ -1571,7 +1572,7 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus) + hbus->msi_info.handler = handle_edge_irq; + hbus->msi_info.handler_name = "edge"; + hbus->msi_info.data = hbus; +- hbus->irq_domain = pci_msi_create_irq_domain(hbus->sysdata.fwnode, ++ hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode, + &hbus->msi_info, + x86_vector_domain); + if (!hbus->irq_domain) { +@@ -1580,6 +1581,8 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus) + return -ENODEV; + } + ++ dev_set_msi_domain(&hbus->bridge->dev, hbus->irq_domain); ++ + return 0; + } + +@@ -3129,9 +3132,9 @@ static int hv_pci_probe(struct hv_device *hdev, + goto unmap; + } + +- hbus->sysdata.fwnode = irq_domain_alloc_named_fwnode(name); ++ hbus->fwnode = irq_domain_alloc_named_fwnode(name); + kfree(name); +- if (!hbus->sysdata.fwnode) { ++ if (!hbus->fwnode) { + ret = -ENOMEM; + goto unmap; + } +@@ -3209,7 +3212,7 @@ static int hv_pci_probe(struct hv_device *hdev, + free_irq_domain: + irq_domain_remove(hbus->irq_domain); + free_fwnode: +- irq_domain_free_fwnode(hbus->sysdata.fwnode); ++ irq_domain_free_fwnode(hbus->fwnode); + unmap: + iounmap(hbus->cfg_addr); + free_config: +@@ -3332,7 +3335,7 @@ static int hv_pci_remove(struct hv_device *hdev) + hv_free_config_window(hbus); + hv_pci_free_bridge_windows(hbus); + irq_domain_remove(hbus->irq_domain); +- irq_domain_free_fwnode(hbus->sysdata.fwnode); ++ irq_domain_free_fwnode(hbus->fwnode); + + hv_put_dom_num(hbus->bridge->domain_nr); + +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 14 Jul 2021 11:34:47 -0700 +Subject: [PATCH 51/53] Drivers: hv: Move Hyper-V misc functionality to + arch-neutral code + +The check for whether hibernation is possible, and the enabling of +Hyper-V panic notification during kexec, are both architecture neutral. +Move the code from under arch/x86 and into drivers/hv/hv_common.c where +it can also be used for ARM64. + +No functional change. + +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/1626287687-2045-4-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 6dc77fa5ac2cf26f846a51492dbe42526e26d0f2) +--- + arch/x86/hyperv/hv_init.c | 8 +------- + arch/x86/kernel/cpu/mshyperv.c | 10 ---------- + drivers/hv/hv_common.c | 18 ++++++++++++++++++ + 3 files changed, 19 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +index 0ec2222e35fe..5f2dae6c33d5 100644 +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -7,10 +7,10 @@ + * Author : K. Y. Srinivasan + */ + +-#include + #include + #include + #include ++#include + #include + #include + #include +@@ -523,12 +523,6 @@ bool hv_is_hyperv_initialized(void) + } + EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); + +-bool hv_is_hibernation_supported(void) +-{ +- return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); +-} +-EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); +- + enum hv_isolation_type hv_get_isolation_type(void) + { + if (!(ms_hyperv.priv_high & HV_ISOLATION)) +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 617b7f35cc8f..c91c4c1aa2c8 100644 +--- a/arch/x86/kernel/cpu/mshyperv.c ++++ b/arch/x86/kernel/cpu/mshyperv.c +@@ -321,16 +321,6 @@ static void __init ms_hyperv_init_platform(void) + cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); + } + +- /* +- * Hyper-V expects to get crash register data or kmsg when +- * crash enlightment is available and system crashes. Set +- * crash_kexec_post_notifiers to be true to make sure that +- * calling crash enlightment interface before running kdump +- * kernel. +- */ +- if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) +- crash_kexec_post_notifiers = true; +- + #ifdef CONFIG_X86_LOCAL_APIC + if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && + ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { +diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c +index cac9c6bfc653..4d7fcff39b8c 100644 +--- a/drivers/hv/hv_common.c ++++ b/drivers/hv/hv_common.c +@@ -13,9 +13,11 @@ + */ + + #include ++#include + #include + #include + #include ++#include + #include + #include + #include +@@ -70,6 +72,16 @@ int __init hv_common_init(void) + { + int i; + ++ /* ++ * Hyper-V expects to get crash register data or kmsg when ++ * crash enlightment is available and system crashes. Set ++ * crash_kexec_post_notifiers to be true to make sure that ++ * calling crash enlightment interface before running kdump ++ * kernel. ++ */ ++ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) ++ crash_kexec_post_notifiers = true; ++ + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup +@@ -204,6 +216,12 @@ bool hv_query_ext_cap(u64 cap_query) + } + EXPORT_SYMBOL_GPL(hv_query_ext_cap); + ++bool hv_is_hibernation_supported(void) ++{ ++ return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); ++} ++EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); ++ + /* These __weak functions provide default "no-op" behavior and + * may be overridden by architecture specific versions. Architectures + * for which the default "no-op" behavior is sufficient can leave +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Tue, 13 Jul 2021 17:01:46 -0700 +Subject: [PATCH 52/53] drivers: hv: Decouple Hyper-V clock/timer code from + VMbus drivers + +Hyper-V clock/timer code in hyperv_timer.c is mostly independent from +other VMbus drivers, but building for ARM64 without hyperv_timer.c +shows some remaining entanglements. A default implementation of +hv_read_reference_counter can just read a Hyper-V synthetic register +and be independent of hyperv_timer.c, so move this code out and into +hv_common.c. Then it can be used by the timesync driver even if +hyperv_timer.c isn't built on a particular architecture. If +hyperv_timer.c *is* built, it can override with a faster implementation. + +Also provide stubs for stimer functions called by the VMbus driver when +hyperv_timer.c isn't built. + +No functional changes. + +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/1626220906-22629-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +(cherry picked from commit 31e5e64694cf9879e63b2802007fa934f4131126) +--- + drivers/clocksource/hyperv_timer.c | 3 --- + drivers/hv/hv_common.c | 14 ++++++++++++++ + drivers/hv/hv_util.c | 5 ----- + include/asm-generic/mshyperv.h | 2 ++ + include/clocksource/hyperv_timer.h | 11 +++++++++-- + 5 files changed, 25 insertions(+), 10 deletions(-) + +diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c +index 150a3f7174a9..6b59c0492cdd 100644 +--- a/drivers/clocksource/hyperv_timer.c ++++ b/drivers/clocksource/hyperv_timer.c +@@ -361,9 +361,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); + * Hyper-V and 32-bit x86. The TSC reference page version is preferred. + */ + +-u64 (*hv_read_reference_counter)(void); +-EXPORT_SYMBOL_GPL(hv_read_reference_counter); +- + static union { + struct ms_hyperv_tsc_page page; + u8 reserved[PAGE_SIZE]; +diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c +index 4d7fcff39b8c..e589ca2d3f38 100644 +--- a/drivers/hv/hv_common.c ++++ b/drivers/hv/hv_common.c +@@ -222,6 +222,20 @@ bool hv_is_hibernation_supported(void) + } + EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); + ++/* ++ * Default function to read the Hyper-V reference counter, independent ++ * of whether Hyper-V enlightened clocks/timers are being used. But on ++ * architectures where it is used, Hyper-V enlightenment code in ++ * hyperv_timer.c may override this function. ++ */ ++static u64 __hv_read_ref_counter(void) ++{ ++ return hv_get_register(HV_REGISTER_TIME_REF_COUNT); ++} ++ ++u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter; ++EXPORT_SYMBOL_GPL(hv_read_reference_counter); ++ + /* These __weak functions provide default "no-op" behavior and + * may be overridden by architecture specific versions. Architectures + * for which the default "no-op" behavior is sufficient can leave +diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c +index 1b914e418e41..83d6a1d80172 100644 +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -17,7 +17,6 @@ + #include + #include + #include +-#include + #include + + #include "hyperv_vmbus.h" +@@ -681,10 +680,6 @@ static struct ptp_clock *hv_ptp_clock; + + static int hv_timesync_init(struct hv_util_service *srv) + { +- /* TimeSync requires Hyper-V clocksource. */ +- if (!hv_read_reference_counter) +- return -ENODEV; +- + spin_lock_init(&host_ts.lock); + + INIT_WORK(&adj_time_work, hv_set_host_time); +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +index 977166ff8672..ea87171e5b25 100644 +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -165,6 +165,8 @@ extern bool hv_root_partition; + extern u32 *hv_vp_index; + extern u32 hv_max_vp_index; + ++extern u64 (*hv_read_reference_counter)(void); ++ + /* Sentinel value for an uninitialized entry in hv_vp_index array */ + #define VP_INVAL U32_MAX + +diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h +index b6774aa5a4b8..b3f5d73ae1d6 100644 +--- a/include/clocksource/hyperv_timer.h ++++ b/include/clocksource/hyperv_timer.h +@@ -20,6 +20,8 @@ + #define HV_MAX_MAX_DELTA_TICKS 0xffffffff + #define HV_MIN_DELTA_TICKS 1 + ++#ifdef CONFIG_HYPERV_TIMER ++ + /* Routines called by the VMbus driver */ + extern int hv_stimer_alloc(bool have_percpu_irqs); + extern int hv_stimer_cleanup(unsigned int cpu); +@@ -28,8 +30,6 @@ extern void hv_stimer_legacy_cleanup(unsigned int cpu); + extern void hv_stimer_global_cleanup(void); + extern void hv_stimer0_isr(void); + +-#ifdef CONFIG_HYPERV_TIMER +-extern u64 (*hv_read_reference_counter)(void); + extern void hv_init_clocksource(void); + + extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); +@@ -100,6 +100,13 @@ static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + { + return U64_MAX; + } ++ ++static inline int hv_stimer_cleanup(unsigned int cpu) { return 0; } ++static inline void hv_stimer_legacy_init(unsigned int cpu, int sint) {} ++static inline void hv_stimer_legacy_cleanup(unsigned int cpu) {} ++static inline void hv_stimer_global_cleanup(void) {} ++static inline void hv_stimer0_isr(void) {} ++ + #endif /* CONFIG_HYPERV_TIMER */ + + #endif +-- +2.18.4 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Pablo Greco +Date: Sat, 23 Oct 2021 15:42:26 +0000 +Subject: [PATCH 53/53] Avoid backporting + f39650de687e35766572ac89dbcd16a5911e2f0a + +--- + drivers/hv/hv_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c +index e589ca2d3f38..3dc8e120b48d 100644 +--- a/drivers/hv/hv_common.c ++++ b/drivers/hv/hv_common.c +@@ -17,7 +17,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +-- +2.18.4 + diff --git a/SOURCES/kernel-aarch64-debug-fedora.config b/SOURCES/kernel-aarch64-debug-fedora.config index 0657f70..6d22bfc 100644 --- a/SOURCES/kernel-aarch64-debug-fedora.config +++ b/SOURCES/kernel-aarch64-debug-fedora.config @@ -8180,3 +8180,16 @@ CONFIG_ZYNQMP_IPI_MBOX=y CONFIG_ZYNQMP_PM_DOMAINS=y CONFIG_ZYNQMP_POWER=y CONFIG_FSL_MC_UAPI_SUPPORT=y +CONFIG_FB_HYPERV=m +CONFIG_HID_HYPERV_MOUSE=m +CONFIG_HYPERV_BALLOON=m +CONFIG_HYPERV_IOMMU=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_HYPERV_KEYBOARD=m +CONFIG_HYPERV=m +CONFIG_HYPERV_NET=m +CONFIG_HYPERV_STORAGE=m +CONFIG_HYPERV_UTILS=m +CONFIG_HYPERV_VSOCKETS=m +CONFIG_PCI_HYPERV=m +CONFIG_XEN_SYS_HYPERVISOR=y diff --git a/SOURCES/kernel-aarch64-fedora.config b/SOURCES/kernel-aarch64-fedora.config index 75d9545..a1c72d4 100644 --- a/SOURCES/kernel-aarch64-fedora.config +++ b/SOURCES/kernel-aarch64-fedora.config @@ -8158,3 +8158,16 @@ CONFIG_ZYNQMP_IPI_MBOX=y CONFIG_ZYNQMP_PM_DOMAINS=y CONFIG_ZYNQMP_POWER=y CONFIG_FSL_MC_UAPI_SUPPORT=y +CONFIG_FB_HYPERV=m +CONFIG_HID_HYPERV_MOUSE=m +CONFIG_HYPERV_BALLOON=m +CONFIG_HYPERV_IOMMU=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_HYPERV_KEYBOARD=m +CONFIG_HYPERV=m +CONFIG_HYPERV_NET=m +CONFIG_HYPERV_STORAGE=m +CONFIG_HYPERV_UTILS=m +CONFIG_HYPERV_VSOCKETS=m +CONFIG_PCI_HYPERV=m +CONFIG_XEN_SYS_HYPERVISOR=y diff --git a/SPECS/kernel.spec b/SPECS/kernel.spec index ffb99c3..5295831 100644 --- a/SPECS/kernel.spec +++ b/SPECS/kernel.spec @@ -878,6 +878,11 @@ Patch105: arm-dts-rpi-4-disable-wifi-frequencies.patch # END OF PATCH DEFINITIONS Patch10000: linux-5.10-lx2160a-network.patch +%ifarch aarch64 +Patch10001: azure.patch +%else +Source10001: azure.patch +%endif %endif @@ -3012,6 +3017,7 @@ fi %changelog * Sat Oct 23 2021 Pablo Greco - 5.10.75-200 - Linux v5.10.75 +- Add patches for azure * Sat Aug 21 2021 Pablo Greco - 5.10.60-200 - Linux v5.10.60