Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index b21ee65..89b1f74 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,2 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-y := hv_init.o mmu.o nested.o
obj-$(CONFIG_X86_64) += hv_apic.o
+
+ifdef CONFIG_X86_64
+obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o
+endif
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 2c43e30..e01078e 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -20,7 +20,6 @@
*/
#include <linux/types.h>
-#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
@@ -87,6 +86,11 @@
static void hv_apic_eoi_write(u32 reg, u32 val)
{
+ struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()];
+
+ if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1))
+ return;
+
wrmsr(HV_X64_MSR_EOI, val, 0);
}
@@ -256,11 +260,21 @@
}
if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
- pr_info("Hyper-V: Using MSR based APIC access\n");
+ pr_info("Hyper-V: Using enlightened APIC (%s mode)",
+ x2apic_enabled() ? "x2apic" : "xapic");
+ /*
+ * With x2apic, architectural x2apic MSRs are equivalent to the
+ * respective synthetic MSRs, so there's no need to override
+ * the apic accessors. The only exception is
+ * hv_apic_eoi_write, because it benefits from lazy EOI when
+ * available, but it works for both xapic and x2apic modes.
+ */
apic_set_eoi_write(hv_apic_eoi_write);
- apic->read = hv_apic_read;
- apic->write = hv_apic_write;
- apic->icr_write = hv_apic_icr_write;
- apic->icr_read = hv_apic_icr_read;
+ if (!x2apic_enabled()) {
+ apic->read = hv_apic_read;
+ apic->write = hv_apic_write;
+ apic->icr_write = hv_apic_icr_write;
+ apic->icr_read = hv_apic_icr_read;
+ }
}
}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 20c876c..2db3972 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -1,22 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* X86 specific Hyper-V initialization code.
*
* Copyright (C) 2016, Microsoft, Inc.
*
* Author : K. Y. Srinivasan <kys@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
+#include <linux/efi.h>
#include <linux/types.h>
#include <asm/apic.h>
#include <asm/desc.h>
@@ -26,64 +17,13 @@
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/clockchips.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-
-static struct ms_hyperv_tsc_page *tsc_pg;
-
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return tsc_pg;
-}
-EXPORT_SYMBOL_GPL(hv_get_tsc_page);
-
-static u64 read_hv_clock_tsc(struct clocksource *arg)
-{
- u64 current_tick = hv_read_tsc_page(tsc_pg);
-
- if (current_tick == U64_MAX)
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
-
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_tsc = {
- .name = "hyperv_clocksource_tsc_page",
- .rating = 400,
- .read = read_hv_clock_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-#endif
-
-static u64 read_hv_clock_msr(struct clocksource *arg)
-{
- u64 current_tick;
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_msr = {
- .name = "hyperv_clocksource_msr",
- .rating = 400,
- .read = read_hv_clock_msr,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
+#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
-struct clocksource *hyperv_cs;
-EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
@@ -95,15 +35,34 @@
EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
u32 hv_max_vp_index;
+EXPORT_SYMBOL_GPL(hv_max_vp_index);
+
+void *hv_alloc_hyperv_page(void)
+{
+ BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
+
+ return (void *)__get_free_page(GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
+
+void hv_free_hyperv_page(unsigned long addr)
+{
+ free_page(addr);
+}
+EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
void **input_arg;
+ struct page *pg;
input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- *input_arg = page_address(alloc_page(GFP_KERNEL));
+ pg = alloc_page(GFP_KERNEL);
+ if (unlikely(!pg))
+ return -ENOMEM;
+ *input_arg = page_address(pg);
hv_get_vp_index(msr_vp_index);
@@ -115,8 +74,17 @@
if (!hv_vp_assist_page)
return 0;
- if (!*hvp)
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ /*
+ * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
+ * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
+ * we always write the EOI MSR in hv_apic_eoi_write() *after* the
+ * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
+ * not be stopped in the case of CPU offlining and the VM will hang.
+ */
+ if (!*hvp) {
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
+ PAGE_KERNEL);
+ }
if (*hvp) {
u64 val;
@@ -253,6 +221,22 @@
return 0;
}
+static int __init hv_pci_init(void)
+{
+ int gen2vm = efi_enabled(EFI_BOOT);
+
+ /*
+ * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
+ * The purpose is to suppress the harmless warning:
+ * "PCI: Fatal: No config space access function found"
+ */
+ if (gen2vm)
+ return 0;
+
+ /* For Generation-1 VM, we'll proceed in pci_arch_init(). */
+ return 1;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -329,41 +313,7 @@
hv_apic_init();
- /*
- * Register Hyper-V specific clocksource.
- */
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
- union hv_x64_msr_hypercall_contents tsc_msr;
-
- tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
- if (!tsc_pg)
- goto register_msr_cs;
-
- hyperv_cs = &hyperv_cs_tsc;
-
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- tsc_msr.enable = 1;
- tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
-
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
-
- clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
- return;
- }
-register_msr_cs:
-#endif
- /*
- * For 32 bit guests just use the MSR based mechanism for reading
- * the partition counter.
- */
-
- hyperv_cs = &hyperv_cs_msr;
- if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+ x86_init.pci.arch_init = hv_pci_init;
return;
@@ -387,6 +337,13 @@
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+ /*
+ * Reset hypercall page reference before reset the page,
+ * let hypercall operations fail safely rather than
+ * panic the kernel for using invalid hypercall page
+ */
+ hv_hypercall_pg = NULL;
+
/* Reset the hypercall page */
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
new file mode 100644
index 0000000..07f21a0
--- /dev/null
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V specific spinlock code.
+ *
+ * Copyright (C) 2018, Intel, Inc.
+ *
+ * Author : Yi Sun <yi.y.sun@intel.com>
+ */
+
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
+#include <linux/spinlock.h>
+
+#include <asm/mshyperv.h>
+#include <asm/paravirt.h>
+#include <asm/apic.h>
+
+static bool __initdata hv_pvspin = true;
+
+static void hv_qlock_kick(int cpu)
+{
+ apic->send_IPI(cpu, X86_PLATFORM_IPI_VECTOR);
+}
+
+static void hv_qlock_wait(u8 *byte, u8 val)
+{
+ unsigned long msr_val;
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ /*
+ * Reading HV_X64_MSR_GUEST_IDLE MSR tells the hypervisor that the
+ * vCPU can be put into 'idle' state. This 'idle' state is
+ * terminated by an IPI, usually from hv_qlock_kick(), even if
+ * interrupts are disabled on the vCPU.
+ *
+ * To prevent a race against the unlock path it is required to
+ * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE
+ * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between
+ * the lock value check and the rdmsrl() then the vCPU might be put
+ * into 'idle' state by the hypervisor and kept in that state for
+ * an unspecified amount of time.
+ */
+ local_irq_save(flags);
+ /*
+ * Only issue the rdmsrl() when the lock state has not changed.
+ */
+ if (READ_ONCE(*byte) == val)
+ rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val);
+ local_irq_restore(flags);
+}
+
+/*
+ * Hyper-V does not support this so far.
+ */
+__visible bool hv_vcpu_is_preempted(int vcpu)
+{
+ return false;
+}
+PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted);
+
+void __init hv_init_spinlocks(void)
+{
+ if (!hv_pvspin || !apic ||
+ !(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) ||
+ !(ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)) {
+ pr_info("PV spinlocks disabled\n");
+ return;
+ }
+ pr_info("PV spinlocks enabled\n");
+
+ __pv_init_lock_hash();
+ pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_ops.lock.wait = hv_qlock_wait;
+ pv_ops.lock.kick = hv_qlock_kick;
+ pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted);
+}
+
+static __init int hv_parse_nopvspin(char *arg)
+{
+ hv_pvspin = false;
+ return 0;
+}
+early_param("hv_nopvspin", hv_parse_nopvspin);
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index ef5f29f..5208ba4 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@
* Lower 12 bits encode the number of additional
* pages to flush (in addition to the 'cur' page).
*/
- if (diff >= HV_TLB_FLUSH_UNIT)
+ if (diff >= HV_TLB_FLUSH_UNIT) {
gva_list[gva_n] |= ~PAGE_MASK;
- else if (diff)
+ cur += HV_TLB_FLUSH_UNIT;
+ } else if (diff) {
gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+ cur = end;
+ }
- cur += HV_TLB_FLUSH_UNIT;
gva_n++;
} while (cur < end);
@@ -231,6 +233,6 @@
return;
pr_info("Using hypercall for remote TLB flush\n");
- pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
- pv_mmu_ops.tlb_remove_table = tlb_remove_table;
+ pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
+ pv_ops.mmu.tlb_remove_table = tlb_remove_table;
}
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
index b8e60cc..dd0a843 100644
--- a/arch/x86/hyperv/nested.c
+++ b/arch/x86/hyperv/nested.c
@@ -7,6 +7,7 @@
*
* Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
*/
+#define pr_fmt(fmt) "Hyper-V: " fmt
#include <linux/types.h>
@@ -54,3 +55,82 @@
return ret;
}
EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
+
+int hyperv_fill_flush_guest_mapping_list(
+ struct hv_guest_mapping_flush_list *flush,
+ u64 start_gfn, u64 pages)
+{
+ u64 cur = start_gfn;
+ u64 additional_pages;
+ int gpa_n = 0;
+
+ do {
+ /*
+ * If flush requests exceed max flush count, go back to
+ * flush tlbs without range.
+ */
+ if (gpa_n >= HV_MAX_FLUSH_REP_COUNT)
+ return -ENOSPC;
+
+ additional_pages = min_t(u64, pages, HV_MAX_FLUSH_PAGES) - 1;
+
+ flush->gpa_list[gpa_n].page.additional_pages = additional_pages;
+ flush->gpa_list[gpa_n].page.largepage = false;
+ flush->gpa_list[gpa_n].page.basepfn = cur;
+
+ pages -= additional_pages + 1;
+ cur += additional_pages + 1;
+ gpa_n++;
+ } while (pages > 0);
+
+ return gpa_n;
+}
+EXPORT_SYMBOL_GPL(hyperv_fill_flush_guest_mapping_list);
+
+int hyperv_flush_guest_mapping_range(u64 as,
+ hyperv_fill_flush_list_func fill_flush_list_func, void *data)
+{
+ struct hv_guest_mapping_flush_list **flush_pcpu;
+ struct hv_guest_mapping_flush_list *flush;
+ u64 status = 0;
+ unsigned long flags;
+ int ret = -ENOTSUPP;
+ int gpa_n = 0;
+
+ if (!hv_hypercall_pg || !fill_flush_list_func)
+ goto fault;
+
+ local_irq_save(flags);
+
+ flush_pcpu = (struct hv_guest_mapping_flush_list **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ flush = *flush_pcpu;
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto fault;
+ }
+
+ flush->address_space = as;
+ flush->flags = 0;
+
+ gpa_n = fill_flush_list_func(flush, data);
+ if (gpa_n < 0) {
+ local_irq_restore(flags);
+ goto fault;
+ }
+
+ status = hv_do_rep_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST,
+ gpa_n, 0, flush, NULL);
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ ret = 0;
+ else
+ ret = status;
+fault:
+ trace_hyperv_nested_flush_guest_mapping_range(as, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping_range);