Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c1f98f3..ba5a418 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,6 +7,7 @@
bool "Xen guest support"
depends on PARAVIRT
select PARAVIRT_CLOCK
+ select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC
help
@@ -18,6 +19,7 @@
bool "Xen PV guest support"
default y
depends on XEN
+ select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
@@ -68,12 +70,12 @@
config XEN_DEBUG_FS
bool "Enable Xen debug and tuning parameters in debugfs"
depends on XEN && DEBUG_FS
- default n
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
config XEN_PVH
- bool "Support for running as a PVH guest"
+ bool "Support for running as a Xen PVH guest"
depends on XEN && XEN_PVHVM && ACPI
+ select PVH
def_bool n
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index d83cb54..084de77 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
-OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
@@ -12,25 +11,45 @@
# Make sure early boot has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_enlighten_pv.o := $(nostackp)
-CFLAGS_mmu_pv.o := $(nostackp)
+CFLAGS_mmu_pv.o := $(nostackp)
-obj-y := enlighten.o multicalls.o mmu.o irq.o \
- time.o xen-asm.o xen-asm_$(BITS).o \
- grant-table.o suspend.o platform-pci-unplug.o
+obj-y += enlighten.o
+obj-y += mmu.o
+obj-y += time.o
+obj-y += grant-table.o
+obj-y += suspend.o
-obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o mmu_hvm.o suspend_hvm.o
-obj-$(CONFIG_XEN_PV) += setup.o apic.o pmu.o suspend_pv.o \
- p2m.o enlighten_pv.o mmu_pv.o
-obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
+obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o
+obj-$(CONFIG_XEN_PVHVM) += mmu_hvm.o
+obj-$(CONFIG_XEN_PVHVM) += suspend_hvm.o
+obj-$(CONFIG_XEN_PVHVM) += platform-pci-unplug.o
-obj-$(CONFIG_EVENT_TRACING) += trace.o
+obj-$(CONFIG_XEN_PV) += setup.o
+obj-$(CONFIG_XEN_PV) += apic.o
+obj-$(CONFIG_XEN_PV) += pmu.o
+obj-$(CONFIG_XEN_PV) += suspend_pv.o
+obj-$(CONFIG_XEN_PV) += p2m.o
+obj-$(CONFIG_XEN_PV) += enlighten_pv.o
+obj-$(CONFIG_XEN_PV) += mmu_pv.o
+obj-$(CONFIG_XEN_PV) += irq.o
+obj-$(CONFIG_XEN_PV) += multicalls.o
+obj-$(CONFIG_XEN_PV) += xen-asm.o
+obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
+
+obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
+
+obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XEN_PV_SMP) += smp_pv.o
obj-$(CONFIG_XEN_PVHVM_SMP) += smp_hvm.o
+
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
+
obj-$(CONFIG_XEN_DOM0) += vga.o
+
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
+
obj-$(CONFIG_XEN_EFI) += efi.o
-obj-$(CONFIG_XEN_PVH) += xen-pvh.o
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index 13da879..5324109 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -9,13 +9,8 @@
struct dentry * __init xen_init_debugfs(void)
{
- if (!d_xen_debug) {
+ if (!d_xen_debug)
d_xen_debug = debugfs_create_dir("xen", NULL);
-
- if (!d_xen_debug)
- pr_warning("Could not create 'xen' debugfs directory\n");
- }
-
return d_xen_debug;
}
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 1804b27..a04551e 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2014 Oracle Co., Daniel Kiper
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/bitops.h>
@@ -69,19 +57,7 @@
return NULL;
/* Here we know that Xen runs on EFI platform. */
-
- efi.get_time = xen_efi_get_time;
- efi.set_time = xen_efi_set_time;
- efi.get_wakeup_time = xen_efi_get_wakeup_time;
- efi.set_wakeup_time = xen_efi_set_wakeup_time;
- efi.get_variable = xen_efi_get_variable;
- efi.get_next_variable = xen_efi_get_next_variable;
- efi.set_variable = xen_efi_set_variable;
- efi.query_variable_info = xen_efi_query_variable_info;
- efi.update_capsule = xen_efi_update_capsule;
- efi.query_capsule_caps = xen_efi_query_capsule_caps;
- efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
- efi.reset_system = xen_efi_reset_system;
+ xen_efi_runtime_setup();
efi_systab_xen.tables = info->cfg.addr;
efi_systab_xen.nr_tables = info->cfg.nent;
@@ -170,7 +146,7 @@
return efi_secureboot_mode_unknown;
}
-void __init xen_efi_init(void)
+void __init xen_efi_init(struct boot_params *boot_params)
{
efi_system_table_t *efi_systab_xen;
@@ -179,12 +155,12 @@
if (efi_systab_xen == NULL)
return;
- strncpy((char *)&boot_params.efi_info.efi_loader_signature, "Xen",
- sizeof(boot_params.efi_info.efi_loader_signature));
- boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
- boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
+ strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen",
+ sizeof(boot_params->efi_info.efi_loader_signature));
+ boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
+ boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
- boot_params.secure_boot = xen_efi_get_secureboot();
+ boot_params->secure_boot = xen_efi_get_secureboot();
set_bit(EFI_BOOT, &efi.flags);
set_bit(EFI_PARAVIRT, &efi.flags);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c6c7c9b..205b117 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,10 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#endif
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <linux/slab.h>
+#include <xen/xen.h>
#include <xen/features.h>
#include <xen/page.h>
@@ -266,19 +269,41 @@
BUG();
}
+static int reboot_reason = SHUTDOWN_reboot;
+static bool xen_legacy_crash;
void xen_emergency_restart(void)
{
- xen_reboot(SHUTDOWN_reboot);
+ xen_reboot(reboot_reason);
}
static int
xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- if (!kexec_crash_loaded())
- xen_reboot(SHUTDOWN_crash);
+ if (!kexec_crash_loaded()) {
+ if (xen_legacy_crash)
+ xen_reboot(SHUTDOWN_crash);
+
+ reboot_reason = SHUTDOWN_crash;
+
+ /*
+ * If panic_timeout==0 then we are supposed to wait forever.
+ * However, to preserve original dom0 behavior we have to drop
+ * into hypervisor. (domU behavior is controlled by its
+ * config file)
+ */
+ if (panic_timeout == 0)
+ panic_timeout = -1;
+ }
return NOTIFY_DONE;
}
+static int __init parse_xen_legacy_crash(char *arg)
+{
+ xen_legacy_crash = true;
+ return 0;
+}
+early_param("xen_legacy_crash", parse_xen_legacy_crash);
+
static struct notifier_block xen_panic_block = {
.notifier_call = xen_panic_event,
.priority = INT_MIN
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 19c1ff5..e138f7d 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/kexec.h>
@@ -208,18 +210,18 @@
#endif
}
-static bool xen_nopv;
static __init int xen_parse_nopv(char *arg)
{
- xen_nopv = true;
- return 0;
+ pr_notice("\"xen_nopv\" is deprecated, please use \"nopv\" instead\n");
+
+ if (xen_cpuid_base())
+ nopv = true;
+ return 0;
}
early_param("xen_nopv", xen_parse_nopv);
-bool xen_hvm_need_lapic(void)
+bool __init xen_hvm_need_lapic(void)
{
- if (xen_nopv)
- return false;
if (xen_pv_domain())
return false;
if (!xen_hvm_domain())
@@ -228,15 +230,6 @@
return false;
return true;
}
-EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
-
-static uint32_t __init xen_platform_hvm(void)
-{
- if (xen_pv_domain() || xen_nopv)
- return 0;
-
- return xen_cpuid_base();
-}
static __init void xen_hvm_guest_late_init(void)
{
@@ -249,6 +242,9 @@
/* PVH detected. */
xen_pvh = true;
+ if (nopv)
+ panic("\"nopv\" and \"xen_nopv\" parameters are unsupported in PVH guest.");
+
/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC)
acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
@@ -258,7 +254,38 @@
#endif
}
-const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+static uint32_t __init xen_platform_hvm(void)
+{
+ uint32_t xen_domain = xen_cpuid_base();
+ struct x86_hyper_init *h = &x86_hyper_xen_hvm.init;
+
+ if (xen_pv_domain())
+ return 0;
+
+ if (xen_pvh_domain() && nopv) {
+ /* Guest booting via the Xen-PVH boot entry goes here */
+ pr_info("\"nopv\" parameter is ignored in PVH guest\n");
+ nopv = false;
+ } else if (nopv && xen_domain) {
+ /*
+ * Guest booting via normal boot entry (like via grub2) goes
+ * here.
+ *
+ * Use interface functions for bare hardware if nopv,
+ * xen_hvm_guest_late_init is an exception as we need to
+ * detect PVH and panic there.
+ */
+ h->init_platform = x86_init_noop;
+ h->x2apic_available = bool_x86_init_noop;
+ h->init_mem_mapping = x86_init_noop;
+ h->init_after_bootmem = x86_init_noop;
+ h->guest_late_init = xen_hvm_guest_late_init;
+ x86_hyper_xen_hvm.runtime.pin_vcpu = x86_op_int_noop;
+ }
+ return xen_domain;
+}
+
+struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
.name = "Xen HVM",
.detect = xen_platform_hvm,
.type = X86_HYPER_XEN_HVM,
@@ -267,4 +294,5 @@
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
.init.guest_late_init = xen_hvm_guest_late_init,
.runtime.pin_vcpu = xen_pin_vcpu,
+ .ignore_nopv = true,
};
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 52a7c3f..5bfea37 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -23,7 +23,7 @@
#include <linux/start_kernel.h>
#include <linux/sched.h>
#include <linux/kprobes.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
@@ -31,7 +31,6 @@
#include <linux/console.h>
#include <linux/pci.h>
#include <linux/gfp.h>
-#include <linux/memblock.h>
#include <linux/edd.h>
#include <linux/frame.h>
@@ -118,6 +117,14 @@
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+
+#ifdef CONFIG_X86_32
+ pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
+ "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
+ "from the Linux kernel!\n"
+ "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
+ "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
+#endif
}
static void __init xen_pv_init_platform(void)
@@ -597,12 +604,12 @@
static struct trap_array_entry trap_array[] = {
{ debug, xen_xendebug, true },
- { int3, xen_xenint3, true },
{ double_fault, xen_double_fault, true },
#ifdef CONFIG_X86_MCE
{ machine_check, xen_machine_check, true },
#endif
{ nmi, xen_xennmi, true },
+ { int3, xen_int3, false },
{ overflow, xen_overflow, false },
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
@@ -878,16 +885,6 @@
native_write_cr4(cr4);
}
-#ifdef CONFIG_X86_64
-static inline unsigned long xen_read_cr8(void)
-{
- return 0;
-}
-static inline void xen_write_cr8(unsigned long val)
-{
- BUG_ON(val);
-}
-#endif
static u64 xen_read_msr_safe(unsigned int msr, int *err)
{
@@ -899,10 +896,7 @@
val = native_read_msr_safe(msr, err);
switch (msr) {
case MSR_IA32_APICBASE:
-#ifdef CONFIG_X86_X2APIC
- if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
-#endif
- val &= ~X2APIC_ENABLE;
+ val &= ~X2APIC_ENABLE;
break;
}
return val;
@@ -995,11 +989,15 @@
* percpu area for all cpus, so make use of it.
*/
if (xen_have_vcpu_info_placement) {
- pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
- pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
- pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+ pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+ pv_ops.irq.restore_fl =
+ __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+ pv_ops.irq.irq_disable =
+ __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+ pv_ops.irq.irq_enable =
+ __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+ pv_ops.mmu.read_cr2 =
+ __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
}
@@ -1023,11 +1021,6 @@
.write_cr4 = xen_write_cr4,
-#ifdef CONFIG_X86_64
- .read_cr8 = xen_read_cr8,
- .write_cr8 = xen_write_cr8,
-#endif
-
.wbinvd = native_wbinvd,
.read_msr = xen_read_msr,
@@ -1174,14 +1167,14 @@
*/
static void __init xen_setup_gdt(int cpu)
{
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
- pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+ pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
+ pv_ops.cpu.load_gdt = xen_load_gdt_boot;
setup_stack_canary_segment(cpu);
switch_to_new_gdt(cpu);
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
- pv_cpu_ops.load_gdt = xen_load_gdt;
+ pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
+ pv_ops.cpu.load_gdt = xen_load_gdt;
}
static void __init xen_dom0_set_legacy_features(void)
@@ -1206,8 +1199,8 @@
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_init_ops.patch = paravirt_patch_default;
- pv_cpu_ops = xen_cpu_ops;
+ pv_ops.init.patch = paravirt_patch_default;
+ pv_ops.cpu = xen_cpu_ops;
xen_init_irq_ops();
/*
@@ -1276,8 +1269,10 @@
#endif
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
- pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
- pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
+ pv_ops.mmu.ptep_modify_prot_start =
+ xen_ptep_modify_prot_start;
+ pv_ops.mmu.ptep_modify_prot_commit =
+ xen_ptep_modify_prot_commit;
}
machine_ops = xen_machine_ops;
@@ -1402,7 +1397,7 @@
/* We need this for printk timestamps */
xen_setup_runstate_info(0);
- xen_efi_init();
+ xen_efi_init(&boot_params);
/* Start the world */
#ifdef CONFIG_X86_32
@@ -1462,4 +1457,5 @@
.detect = xen_platform_pv,
.type = X86_HYPER_XEN_PV,
.runtime.pin_vcpu = xen_pin_vcpu,
+ .ignore_nopv = true,
};
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index f7f7702..80a79db 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -6,102 +6,50 @@
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
-#include <asm/x86_init.h>
+#include <xen/xen.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
#include <xen/interface/memory.h>
-#include <xen/interface/hvm/start_info.h>
+
+#include "xen-ops.h"
/*
* PVH variables.
*
- * xen_pvh pvh_bootparams and pvh_start_info need to live in data segment
- * since they are used after startup_{32|64}, which clear .bss, are invoked.
+ * The variable xen_pvh needs to live in the data segment since it is used
+ * after startup_{32|64} is invoked, which will clear the .bss segment.
*/
bool xen_pvh __attribute__((section(".data"))) = 0;
-struct boot_params pvh_bootparams __attribute__((section(".data")));
-struct hvm_start_info pvh_start_info __attribute__((section(".data")));
-unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
-
-static u64 pvh_get_root_pointer(void)
-{
- return pvh_start_info.rsdp_paddr;
-}
-
-static void __init init_pvh_bootparams(void)
-{
- struct xen_memory_map memmap;
- int rc;
-
- memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
-
- memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
- set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
- if (rc) {
- xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
- BUG();
- }
- pvh_bootparams.e820_entries = memmap.nr_entries;
-
- if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
- pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
- ISA_START_ADDRESS;
- pvh_bootparams.e820_table[pvh_bootparams.e820_entries].size =
- ISA_END_ADDRESS - ISA_START_ADDRESS;
- pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
- E820_TYPE_RESERVED;
- pvh_bootparams.e820_entries++;
- } else
- xen_raw_printk("Warning: Can fit ISA range into e820\n");
-
- pvh_bootparams.hdr.cmd_line_ptr =
- pvh_start_info.cmdline_paddr;
-
- /* The first module is always ramdisk. */
- if (pvh_start_info.nr_modules) {
- struct hvm_modlist_entry *modaddr =
- __va(pvh_start_info.modlist_paddr);
- pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
- pvh_bootparams.hdr.ramdisk_size = modaddr->size;
- }
-
- /*
- * See Documentation/x86/boot.txt.
- *
- * Version 2.12 supports Xen entry point but we will use default x86/PC
- * environment (i.e. hardware_subarch 0).
- */
- pvh_bootparams.hdr.version = (2 << 8) | 12;
- pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
-
- x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
-}
-
-/*
- * This routine (and those that it might call) should not use
- * anything that lives in .bss since that segment will be cleared later.
- */
-void __init xen_prepare_pvh(void)
+void __init xen_pvh_init(struct boot_params *boot_params)
{
u32 msr;
u64 pfn;
- if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
- xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
- pvh_start_info.magic);
- BUG();
- }
-
xen_pvh = 1;
+ xen_domain_type = XEN_HVM_DOMAIN;
xen_start_flags = pvh_start_info.flags;
msr = cpuid_ebx(xen_cpuid_base() + 2);
pfn = __pa(hypercall_page);
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
- init_pvh_bootparams();
+ xen_efi_init(boot_params);
+}
+
+void __init mem_map_via_hcall(struct boot_params *boot_params_p)
+{
+ struct xen_memory_map memmap;
+ int rc;
+
+ memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table);
+ set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table);
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if (rc) {
+ xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+ BUG();
+ }
+ boot_params_p->e820_entries = memmap.nr_entries;
}
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 92ccc71..ecb0d54 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/******************************************************************************
* grant_table.c
* x86 specific part
@@ -8,30 +9,6 @@
* Copyright (c) 2004-2005, K A Fraser
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan. Split out x86 specific part.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
*/
#include <linux/sched.h>
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 7515a19..850c93f 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -128,6 +128,6 @@
void __init xen_init_irq_ops(void)
{
- pv_irq_ops = xen_irq_ops;
+ pv_ops.irq = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 96fc2f0..60e9c37 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
#include <linux/pfn.h>
#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
@@ -6,12 +8,6 @@
#include "multicalls.h"
#include "mmu.h"
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and balloon lists.
- */
-DEFINE_SPINLOCK(xen_reservation_lock);
-
unsigned long arbitrary_virt_to_mfn(void *vaddr)
{
xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
@@ -42,186 +38,6 @@
}
EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
-static noinline void xen_flush_tlb_all(void)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_TLB_FLUSH_ALL;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-#define REMAP_BATCH_SIZE 16
-
-struct remap_data {
- xen_pfn_t *pfn;
- bool contiguous;
- bool no_translate;
- pgprot_t prot;
- struct mmu_update *mmu_update;
-};
-
-static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
-{
- struct remap_data *rmd = data;
- pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
-
- /*
- * If we have a contiguous range, just update the pfn itself,
- * else update pointer to be "next pfn".
- */
- if (rmd->contiguous)
- (*rmd->pfn)++;
- else
- rmd->pfn++;
-
- rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
- rmd->mmu_update->ptr |= rmd->no_translate ?
- MMU_PT_UPDATE_NO_TRANSLATE :
- MMU_NORMAL_PT_UPDATE;
- rmd->mmu_update->val = pte_val_ma(pte);
- rmd->mmu_update++;
-
- return 0;
-}
-
-static int do_remap_pfn(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *pfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid,
- bool no_translate,
- struct page **pages)
-{
- int err = 0;
- struct remap_data rmd;
- struct mmu_update mmu_update[REMAP_BATCH_SIZE];
- unsigned long range;
- int mapped = 0;
-
- BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
-
- rmd.pfn = pfn;
- rmd.prot = prot;
- /*
- * We use the err_ptr to indicate if there we are doing a contiguous
- * mapping or a discontigious mapping.
- */
- rmd.contiguous = !err_ptr;
- rmd.no_translate = no_translate;
-
- while (nr) {
- int index = 0;
- int done = 0;
- int batch = min(REMAP_BATCH_SIZE, nr);
- int batch_left = batch;
- range = (unsigned long)batch << PAGE_SHIFT;
-
- rmd.mmu_update = mmu_update;
- err = apply_to_page_range(vma->vm_mm, addr, range,
- remap_area_pfn_pte_fn, &rmd);
- if (err)
- goto out;
-
- /* We record the error for each page that gives an error, but
- * continue mapping until the whole set is done */
- do {
- int i;
-
- err = HYPERVISOR_mmu_update(&mmu_update[index],
- batch_left, &done, domid);
-
- /*
- * @err_ptr may be the same buffer as @gfn, so
- * only clear it after each chunk of @gfn is
- * used.
- */
- if (err_ptr) {
- for (i = index; i < index + done; i++)
- err_ptr[i] = 0;
- }
- if (err < 0) {
- if (!err_ptr)
- goto out;
- err_ptr[i] = err;
- done++; /* Skip failed frame. */
- } else
- mapped += done;
- batch_left -= done;
- index += done;
- } while (batch_left);
-
- nr -= batch;
- addr += range;
- if (err_ptr)
- err_ptr += batch;
- cond_resched();
- }
-out:
-
- xen_flush_tlb_all();
-
- return err < 0 ? err : mapped;
-}
-
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t gfn, int nr,
- pgprot_t prot, unsigned domid,
- struct page **pages)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return -EOPNOTSUPP;
-
- return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
- pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *gfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned domid, struct page **pages)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
- prot, domid, pages);
-
- /* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
- * and the consequences later is quite hard to detect what the actual
- * cause of "wrong memory was mapped in".
- */
- BUG_ON(err_ptr == NULL);
- return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
- false, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t *mfn, int nr,
- int *err_ptr, pgprot_t prot,
- unsigned int domid, struct page **pages)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return -EOPNOTSUPP;
-
- return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
- true, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
-
/* Returns: 0 success */
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages)
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index a7e47cf..6e4c6bd 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -17,8 +17,8 @@
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte);
unsigned long xen_read_cr2_direct(void);
diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c
index dd2ad82..5740937 100644
--- a/arch/x86/xen/mmu_hvm.c
+++ b/arch/x86/xen/mmu_hvm.c
@@ -73,7 +73,7 @@
void __init xen_hvm_init_mmu_ops(void)
{
if (is_pagetable_dying_supported())
- pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+ pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
#endif
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2c84c6a..c8dbee6 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/*
* Xen mmu operations
*
@@ -99,6 +101,12 @@
#endif /* CONFIG_X86_64 */
/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and balloon lists.
+ */
+static DEFINE_SPINLOCK(xen_reservation_lock);
+
+/*
* Note about cr3 (pagetable base) values:
*
* xen_cr3 contains the current logical cr3 value; it contains the
@@ -298,20 +306,20 @@
__xen_set_pte(ptep, pteval);
}
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
+pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
/* Just return the pte as-is. We preserve the bits on commit */
- trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
+ trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep);
return *ptep;
}
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte)
{
struct mmu_update u;
- trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
+ trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
xen_mc_batch();
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
@@ -640,19 +648,20 @@
unsigned long limit)
{
int i, nr, flush = 0;
- unsigned hole_low, hole_high;
+ unsigned hole_low = 0, hole_high = 0;
/* The limit is the last byte to be touched */
limit--;
BUG_ON(limit >= FIXADDR_TOP);
+#ifdef CONFIG_X86_64
/*
* 64-bit has a great big hole in the middle of the address
- * space, which contains the Xen mappings. On 32-bit these
- * will end up making a zero-sized hole and so is a no-op.
+ * space, which contains the Xen mappings.
*/
- hole_low = pgd_index(USER_LIMIT);
- hole_high = pgd_index(PAGE_OFFSET);
+ hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
+ hole_high = pgd_index(GUARD_HOLE_END_ADDR);
+#endif
nr = pgd_index(limit) + 1;
for (i = 0; i < nr; i++) {
@@ -856,7 +865,7 @@
* The init_mm pagetable is really pinned as soon as its created, but
* that's before we have page structures to store the bits. So do all
* the book-keeping now once struct pages for allocated pages are
- * initialized. This happens only after free_all_bootmem() is called.
+ * initialized. This happens only after memblock_free_all() is called.
*/
static void __init xen_after_bootmem(void)
{
@@ -1298,16 +1307,6 @@
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
-static unsigned long xen_read_cr2(void)
-{
- return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
- return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
static noinline void xen_flush_tlb(void)
{
struct mmuext_op *op;
@@ -2105,10 +2104,10 @@
pt = early_memremap(pt_phys, PAGE_SIZE);
clear_page(pt);
for (idx_pte = 0;
- idx_pte < min(n_pte, PTRS_PER_PTE);
- idx_pte++) {
- set_pte(pt + idx_pte,
- pfn_pte(p2m_pfn, PAGE_KERNEL));
+ idx_pte < min(n_pte, PTRS_PER_PTE);
+ idx_pte++) {
+ pt[idx_pte] = pfn_pte(p2m_pfn,
+ PAGE_KERNEL);
p2m_pfn++;
}
n_pte -= PTRS_PER_PTE;
@@ -2116,8 +2115,7 @@
make_lowmem_page_readonly(__va(pt_phys));
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
PFN_DOWN(pt_phys));
- set_pmd(pmd + idx_pt,
- __pmd(_PAGE_TABLE | pt_phys));
+ pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys);
pt_phys += PAGE_SIZE;
}
n_pt -= PTRS_PER_PMD;
@@ -2125,7 +2123,7 @@
make_lowmem_page_readonly(__va(pmd_phys));
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
PFN_DOWN(pmd_phys));
- set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+ pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys);
pmd_phys += PAGE_SIZE;
}
n_pmd -= PTRS_PER_PUD;
@@ -2209,7 +2207,7 @@
set_page_prot(initial_page_table, PAGE_KERNEL);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
+ pv_ops.mmu.write_cr3 = &xen_write_cr3;
}
/*
@@ -2310,8 +2308,6 @@
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
case VSYSCALL_PAGE:
#endif
- case FIX_TEXT_POKE0:
- case FIX_TEXT_POKE1:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
@@ -2358,27 +2354,27 @@
static void __init xen_post_allocator_init(void)
{
- pv_mmu_ops.set_pte = xen_set_pte;
- pv_mmu_ops.set_pmd = xen_set_pmd;
- pv_mmu_ops.set_pud = xen_set_pud;
+ pv_ops.mmu.set_pte = xen_set_pte;
+ pv_ops.mmu.set_pmd = xen_set_pmd;
+ pv_ops.mmu.set_pud = xen_set_pud;
#ifdef CONFIG_X86_64
- pv_mmu_ops.set_p4d = xen_set_p4d;
+ pv_ops.mmu.set_p4d = xen_set_p4d;
#endif
/* This will work as long as patching hasn't happened yet
(which it hasn't) */
- pv_mmu_ops.alloc_pte = xen_alloc_pte;
- pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
- pv_mmu_ops.release_pte = xen_release_pte;
- pv_mmu_ops.release_pmd = xen_release_pmd;
+ pv_ops.mmu.alloc_pte = xen_alloc_pte;
+ pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
+ pv_ops.mmu.release_pte = xen_release_pte;
+ pv_ops.mmu.release_pmd = xen_release_pmd;
#ifdef CONFIG_X86_64
- pv_mmu_ops.alloc_pud = xen_alloc_pud;
- pv_mmu_ops.release_pud = xen_release_pud;
+ pv_ops.mmu.alloc_pud = xen_alloc_pud;
+ pv_ops.mmu.release_pud = xen_release_pud;
#endif
- pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
+ pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
#ifdef CONFIG_X86_64
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
+ pv_ops.mmu.write_cr3 = &xen_write_cr3;
#endif
}
@@ -2391,7 +2387,7 @@
}
static const struct pv_mmu_ops xen_mmu_ops __initconst = {
- .read_cr2 = xen_read_cr2,
+ .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
@@ -2466,7 +2462,7 @@
x86_init.paging.pagetable_init = xen_pagetable_init;
x86_init.hyper.init_after_bootmem = xen_after_bootmem;
- pv_mmu_ops = xen_mmu_ops;
+ pv_ops.mmu = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
@@ -2629,7 +2625,6 @@
*dma_handle = virt_to_machine(vstart).maddr;
return success ? 0 : -ENOMEM;
}
-EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
@@ -2664,7 +2659,137 @@
spin_unlock_irqrestore(&xen_reservation_lock, flags);
}
-EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
+
+static noinline void xen_flush_tlb_all(void)
+{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+
+ preempt_disable();
+
+ mcs = xen_mc_entry(sizeof(*op));
+
+ op = mcs.args;
+ op->cmd = MMUEXT_TLB_FLUSH_ALL;
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ preempt_enable();
+}
+
+#define REMAP_BATCH_SIZE 16
+
+struct remap_data {
+ xen_pfn_t *pfn;
+ bool contiguous;
+ bool no_translate;
+ pgprot_t prot;
+ struct mmu_update *mmu_update;
+};
+
+static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
+{
+ struct remap_data *rmd = data;
+ pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
+
+ /*
+ * If we have a contiguous range, just update the pfn itself,
+ * else update pointer to be "next pfn".
+ */
+ if (rmd->contiguous)
+ (*rmd->pfn)++;
+ else
+ rmd->pfn++;
+
+ rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+ rmd->mmu_update->ptr |= rmd->no_translate ?
+ MMU_PT_UPDATE_NO_TRANSLATE :
+ MMU_NORMAL_PT_UPDATE;
+ rmd->mmu_update->val = pte_val_ma(pte);
+ rmd->mmu_update++;
+
+ return 0;
+}
+
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+ xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+ unsigned int domid, bool no_translate, struct page **pages)
+{
+ int err = 0;
+ struct remap_data rmd;
+ struct mmu_update mmu_update[REMAP_BATCH_SIZE];
+ unsigned long range;
+ int mapped = 0;
+
+ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+
+ rmd.pfn = pfn;
+ rmd.prot = prot;
+ /*
+ * We use the err_ptr to indicate if there we are doing a contiguous
+ * mapping or a discontigious mapping.
+ */
+ rmd.contiguous = !err_ptr;
+ rmd.no_translate = no_translate;
+
+ while (nr) {
+ int index = 0;
+ int done = 0;
+ int batch = min(REMAP_BATCH_SIZE, nr);
+ int batch_left = batch;
+
+ range = (unsigned long)batch << PAGE_SHIFT;
+
+ rmd.mmu_update = mmu_update;
+ err = apply_to_page_range(vma->vm_mm, addr, range,
+ remap_area_pfn_pte_fn, &rmd);
+ if (err)
+ goto out;
+
+ /*
+ * We record the error for each page that gives an error, but
+ * continue mapping until the whole set is done
+ */
+ do {
+ int i;
+
+ err = HYPERVISOR_mmu_update(&mmu_update[index],
+ batch_left, &done, domid);
+
+ /*
+ * @err_ptr may be the same buffer as @gfn, so
+ * only clear it after each chunk of @gfn is
+ * used.
+ */
+ if (err_ptr) {
+ for (i = index; i < index + done; i++)
+ err_ptr[i] = 0;
+ }
+ if (err < 0) {
+ if (!err_ptr)
+ goto out;
+ err_ptr[i] = err;
+ done++; /* Skip failed frame. */
+ } else
+ mapped += done;
+ batch_left -= done;
+ index += done;
+ } while (batch_left);
+
+ nr -= batch;
+ addr += range;
+ if (err_ptr)
+ err_ptr += batch;
+ cond_resched();
+ }
+out:
+
+ xen_flush_tlb_all();
+
+ return err < 0 ? err : mapped;
+}
+EXPORT_SYMBOL_GPL(xen_remap_pfn);
#ifdef CONFIG_KEXEC_CORE
phys_addr_t paddr_vmcoreinfo_note(void)
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 2bce795..0705457 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -69,6 +69,11 @@
trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
+#if MC_DEBUG
+ memcpy(b->debug, b->entries,
+ b->mcidx * sizeof(struct multicall_entry));
+#endif
+
switch (b->mcidx) {
case 0:
/* no-op */
@@ -87,32 +92,34 @@
break;
default:
-#if MC_DEBUG
- memcpy(b->debug, b->entries,
- b->mcidx * sizeof(struct multicall_entry));
-#endif
-
if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
BUG();
for (i = 0; i < b->mcidx; i++)
if (b->entries[i].result < 0)
ret++;
+ }
+ if (WARN_ON(ret)) {
+ pr_err("%d of %d multicall(s) failed: cpu %d\n",
+ ret, b->mcidx, smp_processor_id());
+ for (i = 0; i < b->mcidx; i++) {
+ if (b->entries[i].result < 0) {
#if MC_DEBUG
- if (ret) {
- printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
- dump_stack();
- for (i = 0; i < b->mcidx; i++) {
- printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
- i+1, b->mcidx,
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n",
+ i + 1,
b->debug[i].op,
b->debug[i].args[0],
b->entries[i].result,
b->caller[i]);
+#else
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
+ i + 1,
+ b->entries[i].op,
+ b->entries[i].args[0],
+ b->entries[i].result);
+#endif
}
}
-#endif
}
b->mcidx = 0;
@@ -126,8 +133,6 @@
b->cbidx = 0;
local_irq_restore(flags);
-
- WARN_ON(ret);
}
struct multicall_space __xen_mc_entry(size_t args)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 159a897..0acba2c 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/*
* Xen leaves the responsibility for maintaining p2m mappings to the
* guests themselves, but it must also access and update the p2m array
@@ -65,7 +67,7 @@
#include <linux/hash.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -179,8 +181,15 @@
static void * __ref alloc_p2m_page(void)
{
- if (unlikely(!slab_is_available()))
- return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+ if (unlikely(!slab_is_available())) {
+ void *ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE);
+
+ return ptr;
+ }
return (void *)__get_free_page(GFP_KERNEL);
}
@@ -188,7 +197,7 @@
static void __ref free_p2m_page(void *p)
{
if (unlikely(!slab_is_available())) {
- free_bootmem((unsigned long)p, PAGE_SIZE);
+ memblock_free((unsigned long)p, PAGE_SIZE);
return;
}
@@ -654,8 +663,7 @@
/*
* The interface requires atomic updates on p2m elements.
- * xen_safe_write_ulong() is using __put_user which does an atomic
- * store via asm().
+ * xen_safe_write_ulong() is using an atomic store via asm().
*/
if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
return true;
@@ -809,9 +817,6 @@
{
struct dentry *d_xen = xen_init_debugfs();
- if (d_xen == NULL)
- return -ENOMEM;
-
d_mmu_debug = debugfs_create_dir("mmu", d_xen);
debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 37c6056..33293ce 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/* Glue code to lib/swiotlb-xen.c */
#include <linux/dma-mapping.h>
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 184b369..96d7f7d 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -1,28 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
/******************************************************************************
* platform-pci-unplug.c
*
* Xen platform PCI device driver
* Copyright (c) 2010, Citrix
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
*/
#include <linux/init.h>
#include <linux/io.h>
#include <linux/export.h>
+#include <xen/xen.h>
#include <xen/platform_pci.h>
#include "xen-ops.h"
@@ -30,7 +19,6 @@
#define XEN_PLATFORM_ERR_PROTOCOL -2
#define XEN_PLATFORM_ERR_BLACKLIST -3
-#ifdef CONFIG_XEN_PVHVM
/* store the value of xen_emul_unplug after the unplug is done */
static int xen_platform_pci_unplug;
static int xen_emul_unplug;
@@ -218,4 +206,3 @@
return 0;
}
early_param("xen_emul_unplug", parse_xen_emul_unplug);
-#endif
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 95997e6..e13b0b4 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -3,6 +3,7 @@
#include <linux/interrupt.h>
#include <asm/xen/hypercall.h>
+#include <xen/xen.h>
#include <xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
@@ -90,6 +91,12 @@
k7_counters_mirrored = 0;
break;
}
+ } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ amd_num_counters = F10H_NUM_COUNTERS;
+ amd_counters_base = MSR_K7_PERFCTR0;
+ amd_ctrls_base = MSR_K7_EVNTSEL0;
+ amd_msr_step = 1;
+ k7_counters_mirrored = 0;
} else {
uint32_t eax, ebx, ecx, edx;
@@ -285,7 +292,7 @@
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
if (is_amd_pmu_msr(msr)) {
if (!xen_amd_pmu_emulate(msr, val, 1))
*val = native_read_msr_safe(msr, err);
@@ -308,7 +315,7 @@
{
uint64_t val = ((uint64_t)high << 32) | low;
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
if (is_amd_pmu_msr(msr)) {
if (!xen_amd_pmu_emulate(msr, &val, 0))
*err = native_write_msr_safe(msr, low, high);
@@ -379,7 +386,7 @@
unsigned long long xen_read_pmc(int counter)
{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return xen_amd_read_pmc(counter);
else
return xen_intel_read_pmc(counter);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 075ed47..548d1e0 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -12,6 +12,7 @@
#include <linux/memblock.h>
#include <linux/cpuidle.h>
#include <linux/cpufreq.h>
+#include <linux/memory_hotplug.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -493,7 +494,7 @@
* The remap information (which mfn remap to which pfn) is contained in the
* to be remapped memory itself in a linked list anchored at xen_remap_mfn.
* This scheme allows to remap the different chunks in arbitrary order while
- * the resulting mapping will be independant from the order.
+ * the resulting mapping will be independent from the order.
*/
void __init xen_remap_memory(void)
{
@@ -589,6 +590,14 @@
if (type == E820_TYPE_RAM) {
start = PAGE_ALIGN(start);
end &= ~((phys_addr_t)PAGE_SIZE - 1);
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Don't allow adding memory not in E820 map while booting the
+ * system. Once the balloon driver is up it will remove that
+ * restriction again.
+ */
+ max_mem_size = end;
+#endif
}
e820__range_add(start, end - start, type);
@@ -748,6 +757,10 @@
memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
+ xen_saved_max_mem_size = max_mem_size;
+#endif
+
op = xen_initial_domain() ?
XENMEM_machine_memory_map :
XENMEM_memory_map;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index e3b18ad..802ee5b 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -22,6 +22,7 @@
#include <linux/tick.h>
#include <linux/nmi.h>
#include <linux/cpuhotplug.h>
+#include <linux/stackprotector.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
@@ -57,6 +58,7 @@
{
int cpu;
+ cr4_init();
cpu_init();
touch_softlockup_watchdog();
preempt_disable();
@@ -88,6 +90,7 @@
asmlinkage __visible void cpu_bringup_and_idle(void)
{
cpu_bringup();
+ boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
@@ -249,6 +252,7 @@
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
@@ -359,7 +363,9 @@
{
int rc;
- common_cpu_up(cpu, idle);
+ rc = common_cpu_up(cpu, idle);
+ if (rc)
+ return rc;
xen_setup_runstate_info(cpu);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 717b484..6deb490 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -3,22 +3,17 @@
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
*/
-#include <linux/kernel_stat.h>
+#include <linux/kernel.h>
#include <linux/spinlock.h>
-#include <linux/debugfs.h>
-#include <linux/log2.h>
-#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/atomic.h>
#include <asm/paravirt.h>
#include <asm/qspinlock.h>
-#include <xen/interface/xen.h>
#include <xen/events.h>
#include "xen-ops.h"
-#include "debugfs.h"
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
@@ -73,11 +68,8 @@
int irq;
char *name;
- if (!xen_pvspin) {
- if (cpu == 0)
- static_branch_disable(&virt_spin_lock_key);
+ if (!xen_pvspin)
return;
- }
WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
cpu, per_cpu(lock_kicker_irq, cpu));
@@ -129,16 +121,18 @@
if (!xen_pvspin) {
printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
+ static_branch_disable(&virt_spin_lock_key);
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
__pv_init_lock_hash();
- pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
- pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
- pv_lock_ops.wait = xen_qlock_wait;
- pv_lock_ops.kick = xen_qlock_kick;
- pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
+ pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_ops.lock.queued_spin_unlock =
+ PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_ops.lock.wait = xen_qlock_wait;
+ pv_ops.lock.kick = xen_qlock_kick;
+ pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
}
static __init int xen_parse_nopvspin(char *arg)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c84f1e0..befbdd8 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -28,7 +28,7 @@
#include "xen-ops.h"
-/* Xen may fire a timer up to this many ns early */
+/* Minimum amount of time until next clock event fires */
#define TIMER_SLOP 100000
static u64 xen_sched_clock_offset __read_mostly;
@@ -212,7 +212,7 @@
return 0;
}
-static const struct clock_event_device xen_timerop_clockevent = {
+static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
@@ -273,7 +273,7 @@
return ret;
}
-static const struct clock_event_device xen_vcpuop_clockevent = {
+static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
@@ -361,8 +361,6 @@
{
int cpu;
- pvclock_resume();
-
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
@@ -379,12 +377,15 @@
};
static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
+static u64 xen_clock_value_saved;
void xen_save_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;
+ xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
+
if (!xen_clock)
return;
@@ -404,7 +405,7 @@
int ret;
if (!xen_clock)
- return;
+ goto out;
t.addr.v = &xen_clock->pvti;
@@ -421,6 +422,11 @@
if (ret != 0)
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
ret);
+
+out:
+ /* Need pvclock_resume() before using xen_clocksource_read(). */
+ pvclock_resume();
+ xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
}
static void xen_setup_vsyscall_time_info(void)
@@ -513,7 +519,7 @@
void __init xen_init_time_ops(void)
{
xen_sched_clock_offset = xen_clocksource_read();
- pv_time_ops = xen_time_ops;
+ pv_ops.time = xen_time_ops;
x86_init.timers.timer_init = xen_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
@@ -555,7 +561,7 @@
}
xen_sched_clock_offset = xen_clocksource_read();
- pv_time_ops = xen_time_ops;
+ pv_ops.time = xen_time_ops;
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
@@ -564,3 +570,17 @@
x86_platform.set_wallclock = xen_set_wallclock;
}
#endif
+
+/* Kernel parameter to specify Xen timer slop */
+static int __init parse_xen_timer_slop(char *ptr)
+{
+ unsigned long slop = memparse(ptr, NULL);
+
+ xen_timerop_clockevent.min_delta_ns = slop;
+ xen_timerop_clockevent.min_delta_ticks = slop;
+ xen_vcpuop_clockevent.min_delta_ns = slop;
+ xen_vcpuop_clockevent.min_delta_ticks = slop;
+
+ return 0;
+}
+early_param("xen_timer_slop", parse_xen_timer_slop);
diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h
index 861fedf..873c54c 100644
--- a/arch/x86/xen/vdso.h
+++ b/arch/x86/xen/vdso.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
/* Bit used for the pseudo-hwcap for non-negative segments. We use
bit 1 to avoid bugs in some versions of glibc when bit 0 is
used; the choice is otherwise arbitrary. */
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd..be104ee 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/frame.h>
+#include <asm/asm.h>
#include <linux/linkage.h>
@@ -135,3 +136,18 @@
FRAME_END
ret
ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+ _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index c15db06..cd17777 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -126,10 +126,9 @@
.globl xen_iret_start_crit, xen_iret_end_crit
/*
- * This is called by xen_hypervisor_callback in entry.S when it sees
+ * This is called by xen_hypervisor_callback in entry_32.S when it sees
* that the EIP at the time of interrupt was between
- * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
- * %eax so we can do a more refined determination of what to do.
+ * xen_iret_start_crit and xen_iret_end_crit.
*
* The stack format at this point is:
* ----------------
@@ -138,70 +137,46 @@
* eflags } outer exception info
* cs }
* eip }
- * ---------------- <- edi (copy dest)
+ * ----------------
* eax : outer eax if it hasn't been restored
* ----------------
- * eflags } nested exception info
- * cs } (no ss/esp because we're nested
- * eip } from the same ring)
- * orig_eax }<- esi (copy src)
- * - - - - - - - -
- * fs }
- * es }
- * ds } SAVE_ALL state
- * eax }
- * : :
- * ebx }<- esp
- * ----------------
+ * eflags }
+ * cs } nested exception info
+ * eip }
+ * return address : (into xen_hypervisor_callback)
*
- * In order to deliver the nested exception properly, we need to shift
- * everything from the return addr up to the error code so it sits
- * just under the outer exception info. This means that when we
- * handle the exception, we do it in the context of the outer
- * exception rather than starting a new one.
+ * In order to deliver the nested exception properly, we need to discard the
+ * nested exception frame such that when we handle the exception, we do it
+ * in the context of the outer exception rather than starting a new one.
*
- * The only caveat is that if the outer eax hasn't been restored yet
- * (ie, it's still on stack), we need to insert its value into the
- * SAVE_ALL state before going on, since it's usermode state which we
- * eventually need to restore.
+ * The only caveat is that if the outer eax hasn't been restored yet (i.e.
+ * it's still on stack), we need to restore its value here.
*/
ENTRY(xen_iret_crit_fixup)
/*
* Paranoia: Make sure we're really coming from kernel space.
* One could imagine a case where userspace jumps into the
* critical range address, but just before the CPU delivers a
- * GP, it decides to deliver an interrupt instead. Unlikely?
- * Definitely. Easy to avoid? Yes. The Intel documents
- * explicitly say that the reported EIP for a bad jump is the
- * jump instruction itself, not the destination, but some
- * virtual environments get this wrong.
+ * PF, it decides to deliver an interrupt instead. Unlikely?
+ * Definitely. Easy to avoid? Yes.
*/
- movl PT_CS(%esp), %ecx
- andl $SEGMENT_RPL_MASK, %ecx
- cmpl $USER_RPL, %ecx
- je 2f
-
- lea PT_ORIG_EAX(%esp), %esi
- lea PT_EFLAGS(%esp), %edi
+ testb $2, 2*4(%esp) /* nested CS */
+ jnz 2f
/*
* If eip is before iret_restore_end then stack
* hasn't been restored yet.
*/
- cmp $iret_restore_end, %eax
+ cmpl $iret_restore_end, 1*4(%esp)
jae 1f
- movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
- movl %eax, PT_EAX(%esp)
+ movl 4*4(%esp), %eax /* load outer EAX */
+ ret $4*4 /* discard nested EIP, CS, and EFLAGS as
+ * well as the just restored EAX */
- lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
+1:
+ ret $3*4 /* discard nested EIP, CS, and EFLAGS */
- /* set up the copy */
-1: std
- mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
- rep movsl
- cld
-
- lea 4(%edi), %esp /* point esp to new frame */
-2: jmp xen_do_upcall
-
+2:
+ ret
+END(xen_iret_crit_fixup)
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 417b339..ebf610b 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -12,6 +12,7 @@
#include <asm/segment.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
+#include <asm/asm.h>
#include <xen/interface/xen.h>
@@ -24,13 +25,13 @@
pop %r11
jmp \name
END(xen_\name)
+_ASM_NOKPROBE(xen_\name)
.endm
xen_pv_trap divide_error
xen_pv_trap debug
xen_pv_trap xendebug
xen_pv_trap int3
-xen_pv_trap xenint3
xen_pv_trap xennmi
xen_pv_trap overflow
xen_pv_trap bounds
@@ -91,13 +92,15 @@
ENTRY(xen_sysret64)
/*
* We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back
+ * still with the kernel gs, so we can easily switch back.
+ *
+ * tss.sp2 is scratch space.
*/
- movq %rsp, PER_CPU_VAR(rsp_scratch)
+ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER_DS
- pushq PER_CPU_VAR(rsp_scratch)
+ pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
pushq %r11
pushq $__USER_CS
pushq %rcx
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 5077ead..c1d8b90 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -40,13 +40,13 @@
#ifdef CONFIG_X86_64
/* Set up %gs.
*
- * The base of %gs always points to the bottom of the irqstack
- * union. If the stack protector canary is enabled, it is
- * located at %gs:40. Note that, on SMP, the boot cpu uses
- * init data section till per cpu areas are set up.
+ * The base of %gs always points to fixed_percpu_data. If the
+ * stack protector canary is enabled, it is located at %gs:40.
+ * Note that, on SMP, the boot cpu uses init data section until
+ * the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
+ movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
wrmsr
#endif
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0e60bd9..45a441c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -122,9 +122,9 @@
void __init xen_init_apic(void);
#ifdef CONFIG_XEN_EFI
-extern void xen_efi_init(void);
+extern void xen_efi_init(struct boot_params *boot_params);
#else
-static inline void __init xen_efi_init(void)
+static inline void __init xen_efi_init(struct boot_params *boot_params)
{
}
#endif
@@ -134,6 +134,9 @@
__visible unsigned long xen_save_fl_direct(void);
__visible void xen_restore_fl_direct(unsigned long);
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
__visible void xen_sysret32(void);
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
deleted file mode 100644
index 58722a0..0000000
--- a/arch/x86/xen/xen-pvh.S
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
- .code32
- .text
-#define _pa(x) ((x) - __START_KERNEL_map)
-
-#include <linux/elfnote.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/asm.h>
-#include <asm/boot.h>
-#include <asm/processor-flags.h>
-#include <asm/msr.h>
-#include <xen/interface/elfnote.h>
-
- __HEAD
-
-/*
- * Entry point for PVH guests.
- *
- * Xen ABI specifies the following register state when we come here:
- *
- * - `ebx`: contains the physical memory address where the loader has placed
- * the boot start info structure.
- * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
- * - `cr4`: all bits are cleared.
- * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
- * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
- * - `ds`, `es`: must be a 32-bit read/write data segment with a base of
- * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all
- * unspecified.
- * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
- * of '0x67'.
- * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
- * Bit 8 (TF) must be cleared. Other bits are all unspecified.
- *
- * All other processor registers and flag bits are unspecified. The OS is in
- * charge of setting up it's own stack, GDT and IDT.
- */
-
-#define PVH_GDT_ENTRY_CS 1
-#define PVH_GDT_ENTRY_DS 2
-#define PVH_GDT_ENTRY_CANARY 3
-#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8)
-#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8)
-#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8)
-
-ENTRY(pvh_start_xen)
- cld
-
- lgdt (_pa(gdt))
-
- mov $PVH_DS_SEL,%eax
- mov %eax,%ds
- mov %eax,%es
- mov %eax,%ss
-
- /* Stash hvm_start_info. */
- mov $_pa(pvh_start_info), %edi
- mov %ebx, %esi
- mov _pa(pvh_start_info_sz), %ecx
- shr $2,%ecx
- rep
- movsl
-
- mov $_pa(early_stack_end), %esp
-
- /* Enable PAE mode. */
- mov %cr4, %eax
- orl $X86_CR4_PAE, %eax
- mov %eax, %cr4
-
-#ifdef CONFIG_X86_64
- /* Enable Long mode. */
- mov $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- wrmsr
-
- /* Enable pre-constructed page tables. */
- mov $_pa(init_top_pgt), %eax
- mov %eax, %cr3
- mov $(X86_CR0_PG | X86_CR0_PE), %eax
- mov %eax, %cr0
-
- /* Jump to 64-bit mode. */
- ljmp $PVH_CS_SEL, $_pa(1f)
-
- /* 64-bit entry point. */
- .code64
-1:
- /* Set base address in stack canary descriptor. */
- mov $MSR_GS_BASE,%ecx
- mov $_pa(canary), %eax
- xor %edx, %edx
- wrmsr
-
- call xen_prepare_pvh
-
- /* startup_64 expects boot_params in %rsi. */
- mov $_pa(pvh_bootparams), %rsi
- mov $_pa(startup_64), %rax
- jmp *%rax
-
-#else /* CONFIG_X86_64 */
-
- /* Set base address in stack canary descriptor. */
- movl $_pa(gdt_start),%eax
- movl $_pa(canary),%ecx
- movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax)
- shrl $16, %ecx
- movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax)
- movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax)
-
- mov $PVH_CANARY_SEL,%eax
- mov %eax,%gs
-
- call mk_early_pgtbl_32
-
- mov $_pa(initial_page_table), %eax
- mov %eax, %cr3
-
- mov %cr0, %eax
- or $(X86_CR0_PG | X86_CR0_PE), %eax
- mov %eax, %cr0
-
- ljmp $PVH_CS_SEL, $1f
-1:
- call xen_prepare_pvh
- mov $_pa(pvh_bootparams), %esi
-
- /* startup_32 doesn't expect paging and PAE to be on. */
- ljmp $PVH_CS_SEL, $_pa(2f)
-2:
- mov %cr0, %eax
- and $~X86_CR0_PG, %eax
- mov %eax, %cr0
- mov %cr4, %eax
- and $~X86_CR4_PAE, %eax
- mov %eax, %cr4
-
- ljmp $PVH_CS_SEL, $_pa(startup_32)
-#endif
-END(pvh_start_xen)
-
- .section ".init.data","aw"
- .balign 8
-gdt:
- .word gdt_end - gdt_start
- .long _pa(gdt_start)
- .word 0
-gdt_start:
- .quad 0x0000000000000000 /* NULL descriptor */
-#ifdef CONFIG_X86_64
- .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */
-#else
- .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */
-#endif
- .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
- .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */
-gdt_end:
-
- .balign 16
-canary:
- .fill 48, 1, 0
-
-early_stack:
- .fill BOOT_STACK_SIZE, 1, 0
-early_stack_end:
-
- ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
- _ASM_PTR (pvh_start_xen - __START_KERNEL_map))