Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore
index c5f676c..bbb90f9 100644
--- a/arch/arm64/kernel/.gitignore
+++ b/arch/arm64/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux.lds
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f..bbaf0bc 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux kernel.
#
-CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
-AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_armv8_deprecated.o := -I$(src)
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
@@ -13,15 +11,15 @@
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
- entry-fpsimd.o process.o ptrace.o setup.o signal.o \
- sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o cpu_ops.o insn.o \
+ entry-common.o entry-fpsimd.o process.o ptrace.o \
+ setup.o signal.o sys.o stacktrace.o time.o traps.o \
+ io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o
+ syscall.o proton-pack.o
-extra-$(CONFIG_EFI) := efi-entry.o
+targets += efi-entry.o
OBJCOPYFLAGS := --prefix-symbols=__efistub_
$(obj)/%.stub.o: $(obj)/%.o FORCE
@@ -29,9 +27,7 @@
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
-ifneq ($(CONFIG_COMPAT_VDSO), y)
obj-$(CONFIG_COMPAT) += sigreturn32.o
-endif
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
obj-$(CONFIG_MODULES) += module.o
@@ -61,8 +57,9 @@
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
-obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
+obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 46ec402..cada0b8 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -24,12 +24,12 @@
#include <linux/of_fdt.h>
#include <linux/smp.h>
#include <linux/serial_core.h>
+#include <linux/pgtable.h>
#include <acpi/ghes.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
-#include <asm/pgtable.h>
#include <asm/smp_plat.h>
int acpi_noirq = 1; /* skip ACPI IRQ initialization */
@@ -261,6 +261,94 @@
return __pgprot(PROT_DEVICE_nGnRnE);
}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+ efi_memory_desc_t *md, *region = NULL;
+ pgprot_t prot;
+
+ if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+ return NULL;
+
+ for_each_efi_memory_desc(md) {
+ u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (phys < md->phys_addr || phys >= end)
+ continue;
+
+ if (phys + size > end) {
+ pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+ return NULL;
+ }
+ region = md;
+ break;
+ }
+
+ /*
+ * It is fine for AML to remap regions that are not represented in the
+ * EFI memory map at all, as it only describes normal memory, and MMIO
+ * regions that require a virtual mapping to make them accessible to
+ * the EFI runtime services.
+ */
+ prot = __pgprot(PROT_DEVICE_nGnRnE);
+ if (region) {
+ switch (region->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ case EFI_PERSISTENT_MEMORY:
+ if (memblock_is_map_memory(phys) ||
+ !memblock_is_region_memory(phys, size)) {
+ pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys);
+ return NULL;
+ }
+ /*
+ * Mapping kernel memory is permitted if the region in
+ * question is covered by a single memblock with the
+ * NOMAP attribute set: this enables the use of ACPI
+ * table overrides passed via initramfs, which are
+ * reserved in memory using arch_reserve_mem_area()
+ * below. As this particular use case only requires
+ * read access, fall through to the R/O mapping case.
+ */
+ fallthrough;
+
+ case EFI_RUNTIME_SERVICES_CODE:
+ /*
+ * This would be unusual, but not problematic per se,
+ * as long as we take care not to create a writable
+ * mapping for executable code.
+ */
+ prot = PAGE_KERNEL_RO;
+ break;
+
+ case EFI_ACPI_RECLAIM_MEMORY:
+ /*
+ * ACPI reclaim memory is used to pass firmware tables
+ * and other data that is intended for consumption by
+ * the OS only, which may decide it wants to reclaim
+ * that memory and use it for something else. We never
+ * do that, but we usually add it to the linear map
+ * anyway, in which case we should use the existing
+ * mapping.
+ */
+ if (memblock_is_map_memory(phys))
+ return (void __iomem *)__phys_to_virt(phys);
+ fallthrough;
+
+ default:
+ if (region->attribute & EFI_MEMORY_WB)
+ prot = PAGE_KERNEL;
+ else if (region->attribute & EFI_MEMORY_WT)
+ prot = __pgprot(PROT_NORMAL_WT);
+ else if (region->attribute & EFI_MEMORY_WC)
+ prot = __pgprot(PROT_NORMAL_NC);
+ }
+ }
+ return __ioremap(phys, size, prot);
+}
+
/*
* Claim Synchronous External Aborts as a firmware first notification.
*
@@ -313,3 +401,8 @@
return err;
}
+
+void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+ memblock_mark_nomap(addr, size);
+}
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index bcb14d1..7364de0 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -203,7 +203,7 @@
}
static int emulation_proc_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
+ void *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = 0;
@@ -618,7 +618,8 @@
};
/*
- * Invoked as late_initcall, since not needed before init spawned.
+ * Invoked as core_initcall, which guarantees that the instruction
+ * emulation is ready for userspace.
*/
static int __init armv8_deprecated_init(void)
{
@@ -629,7 +630,7 @@
register_insn_emulation(&cp15_barrier_ops);
if (IS_ENABLED(CONFIG_SETEND_EMULATION)) {
- if(system_supports_mixed_endian_el0())
+ if (system_supports_mixed_endian_el0())
register_insn_emulation(&setend_ops);
else
pr_info("setend instruction emulation is not supported on this system\n");
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 2146857..7d32fc9 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -34,12 +34,20 @@
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base));
+ DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp));
+#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
#endif
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
+#ifdef CONFIG_ARM64_PTR_AUTH
+ DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user));
+ DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel));
+#endif
BLANK();
DEFINE(S_X0, offsetof(struct pt_regs, regs[0]));
DEFINE(S_X2, offsetof(struct pt_regs, regs[2]));
@@ -56,6 +64,7 @@
DEFINE(S_X24, offsetof(struct pt_regs, regs[24]));
DEFINE(S_X26, offsetof(struct pt_regs, regs[26]));
DEFINE(S_X28, offsetof(struct pt_regs, regs[28]));
+ DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
@@ -88,18 +97,17 @@
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
-#ifdef CONFIG_KVM_ARM_HOST
+#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
- DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
+ DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1]));
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
- DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
#endif
@@ -127,5 +135,14 @@
DEFINE(SDEI_EVENT_INTREGS, offsetof(struct sdei_registered_event, interrupted_regs));
DEFINE(SDEI_EVENT_PRIORITY, offsetof(struct sdei_registered_event, priority));
#endif
+#ifdef CONFIG_ARM64_PTR_AUTH
+ DEFINE(PTRAUTH_USER_KEY_APIA, offsetof(struct ptrauth_keys_user, apia));
+ DEFINE(PTRAUTH_USER_KEY_APIB, offsetof(struct ptrauth_keys_user, apib));
+ DEFINE(PTRAUTH_USER_KEY_APDA, offsetof(struct ptrauth_keys_user, apda));
+ DEFINE(PTRAUTH_USER_KEY_APDB, offsetof(struct ptrauth_keys_user, apdb));
+ DEFINE(PTRAUTH_USER_KEY_APGA, offsetof(struct ptrauth_keys_user, apga));
+ DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia));
+ BLANK();
+#endif
return 0;
}
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 7fa6828..587543c 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -43,7 +43,7 @@
this_leaf->type = type;
}
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
{
unsigned int ctype, level, leaves, fw_level;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -78,7 +78,7 @@
return 0;
}
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
{
unsigned int level, idx;
enum cache_type type;
@@ -97,6 +97,3 @@
}
return 0;
}
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d..37721eb 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -29,12 +29,16 @@
* branch to what would be the reset vector. It must be executed with the
* flat identity mapping.
*/
-ENTRY(__cpu_soft_restart)
+SYM_CODE_START(__cpu_soft_restart)
/* Clear sctlr_el1 flags. */
mrs x12, sctlr_el1
- ldr x13, =SCTLR_ELx_FLAGS
+ mov_q x13, SCTLR_ELx_FLAGS
bic x12, x12, x13
pre_disable_mmu_workaround
+ /*
+ * either disable EL1&0 translation regime or disable EL2&0 translation
+ * regime if HCR_EL2.E2H == 1
+ */
msr sctlr_el1, x12
isb
@@ -42,11 +46,11 @@
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return
-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
-ENDPROC(__cpu_soft_restart)
+ br x8
+SYM_CODE_END(__cpu_soft_restart)
.popsection
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 1e16c4e..533559c 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -6,12 +6,12 @@
*/
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
#include <linux/types.h>
#include <linux/cpu.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_asm.h>
#include <asm/smp_plat.h>
static bool __maybe_unused
@@ -106,407 +106,6 @@
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
-atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
-extern char __smccc_workaround_1_smc_start[];
-extern char __smccc_workaround_1_smc_end[];
-
-static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K);
- int i;
-
- for (i = 0; i < SZ_2K; i += 0x80)
- memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
-
- __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
-}
-
-static void install_bp_hardening_cb(bp_hardening_cb_t fn,
- const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- static DEFINE_RAW_SPINLOCK(bp_lock);
- int cpu, slot = -1;
-
- /*
- * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
- * we're a guest. Skip the hyp-vectors work.
- */
- if (!hyp_vecs_start) {
- __this_cpu_write(bp_hardening_data.fn, fn);
- return;
- }
-
- raw_spin_lock(&bp_lock);
- for_each_possible_cpu(cpu) {
- if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
- slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
- break;
- }
- }
-
- if (slot == -1) {
- slot = atomic_inc_return(&arm64_el2_vector_last_slot);
- BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
- __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
- }
-
- __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
- __this_cpu_write(bp_hardening_data.fn, fn);
- raw_spin_unlock(&bp_lock);
-}
-#else
-#define __smccc_workaround_1_smc_start NULL
-#define __smccc_workaround_1_smc_end NULL
-
-static void install_bp_hardening_cb(bp_hardening_cb_t fn,
- const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- __this_cpu_write(bp_hardening_data.fn, fn);
-}
-#endif /* CONFIG_KVM_INDIRECT_VECTORS */
-
-#include <uapi/linux/psci.h>
-#include <linux/arm-smccc.h>
-#include <linux/psci.h>
-
-static void call_smc_arch_workaround_1(void)
-{
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
-}
-
-static void call_hvc_arch_workaround_1(void)
-{
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
-}
-
-static void qcom_link_stack_sanitization(void)
-{
- u64 tmp;
-
- asm volatile("mov %0, x30 \n"
- ".rept 16 \n"
- "bl . + 4 \n"
- ".endr \n"
- "mov x30, %0 \n"
- : "=&r" (tmp));
-}
-
-static bool __nospectre_v2;
-static int __init parse_nospectre_v2(char *str)
-{
- __nospectre_v2 = true;
- return 0;
-}
-early_param("nospectre_v2", parse_nospectre_v2);
-
-/*
- * -1: No workaround
- * 0: No workaround required
- * 1: Workaround installed
- */
-static int detect_harden_bp_fw(void)
-{
- bp_hardening_cb_t cb;
- void *smccc_start, *smccc_end;
- struct arm_smccc_res res;
- u32 midr = read_cpuid_id();
-
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
- return -1;
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- cb = call_hvc_arch_workaround_1;
- /* This is a guest, no need to patch KVM vectors */
- smccc_start = NULL;
- smccc_end = NULL;
- break;
- default:
- return -1;
- }
- break;
-
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- cb = call_smc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_smc_start;
- smccc_end = __smccc_workaround_1_smc_end;
- break;
- default:
- return -1;
- }
- break;
-
- default:
- return -1;
- }
-
- if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
- ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
- cb = qcom_link_stack_sanitization;
-
- if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR))
- install_bp_hardening_cb(cb, smccc_start, smccc_end);
-
- return 1;
-}
-
-DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
-
-int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
-static bool __ssb_safe = true;
-
-static const struct ssbd_options {
- const char *str;
- int state;
-} ssbd_options[] = {
- { "force-on", ARM64_SSBD_FORCE_ENABLE, },
- { "force-off", ARM64_SSBD_FORCE_DISABLE, },
- { "kernel", ARM64_SSBD_KERNEL, },
-};
-
-static int __init ssbd_cfg(char *buf)
-{
- int i;
-
- if (!buf || !buf[0])
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) {
- int len = strlen(ssbd_options[i].str);
-
- if (strncmp(buf, ssbd_options[i].str, len))
- continue;
-
- ssbd_state = ssbd_options[i].state;
- return 0;
- }
-
- return -EINVAL;
-}
-early_param("ssbd", ssbd_cfg);
-
-void __init arm64_update_smccc_conduit(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr,
- int nr_inst)
-{
- u32 insn;
-
- BUG_ON(nr_inst != 1);
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- insn = aarch64_insn_get_hvc_value();
- break;
- case PSCI_CONDUIT_SMC:
- insn = aarch64_insn_get_smc_value();
- break;
- default:
- return;
- }
-
- *updptr = cpu_to_le32(insn);
-}
-
-void __init arm64_enable_wa2_handling(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr,
- int nr_inst)
-{
- BUG_ON(nr_inst != 1);
- /*
- * Only allow mitigation on EL1 entry/exit and guest
- * ARCH_WORKAROUND_2 handling if the SSBD state allows it to
- * be flipped.
- */
- if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL)
- *updptr = cpu_to_le32(aarch64_insn_gen_nop());
-}
-
-void arm64_set_ssbd_mitigation(bool state)
-{
- if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
- pr_info_once("SSBD disabled by kernel configuration\n");
- return;
- }
-
- if (this_cpu_has_cap(ARM64_SSBS)) {
- if (state)
- asm volatile(SET_PSTATE_SSBS(0));
- else
- asm volatile(SET_PSTATE_SSBS(1));
- return;
- }
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
- break;
-
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
- break;
-
- default:
- WARN_ON_ONCE(1);
- break;
- }
-}
-
-static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
- int scope)
-{
- struct arm_smccc_res res;
- bool required = true;
- s32 val;
- bool this_cpu_safe = false;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-
- if (cpu_mitigations_off())
- ssbd_state = ARM64_SSBD_FORCE_DISABLE;
-
- /* delay setting __ssb_safe until we get a firmware response */
- if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
- this_cpu_safe = true;
-
- if (this_cpu_has_cap(ARM64_SSBS)) {
- if (!this_cpu_safe)
- __ssb_safe = false;
- required = false;
- goto out_printmsg;
- }
-
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- break;
-
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- break;
-
- default:
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- val = (s32)res.a0;
-
- switch (val) {
- case SMCCC_RET_NOT_SUPPORTED:
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
-
- /* machines with mixed mitigation requirements must not return this */
- case SMCCC_RET_NOT_REQUIRED:
- pr_info_once("%s mitigation not required\n", entry->desc);
- ssbd_state = ARM64_SSBD_MITIGATED;
- return false;
-
- case SMCCC_RET_SUCCESS:
- __ssb_safe = false;
- required = true;
- break;
-
- case 1: /* Mitigation not required on this CPU */
- required = false;
- break;
-
- default:
- WARN_ON(1);
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- switch (ssbd_state) {
- case ARM64_SSBD_FORCE_DISABLE:
- arm64_set_ssbd_mitigation(false);
- required = false;
- break;
-
- case ARM64_SSBD_KERNEL:
- if (required) {
- __this_cpu_write(arm64_ssbd_callback_required, 1);
- arm64_set_ssbd_mitigation(true);
- }
- break;
-
- case ARM64_SSBD_FORCE_ENABLE:
- arm64_set_ssbd_mitigation(true);
- required = true;
- break;
-
- default:
- WARN_ON(1);
- break;
- }
-
-out_printmsg:
- switch (ssbd_state) {
- case ARM64_SSBD_FORCE_DISABLE:
- pr_info_once("%s disabled from command-line\n", entry->desc);
- break;
-
- case ARM64_SSBD_FORCE_ENABLE:
- pr_info_once("%s forced from command-line\n", entry->desc);
- break;
- }
-
- return required;
-}
-
-static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
-{
- if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
- cap->matches(cap, SCOPE_LOCAL_CPU);
-}
-
-/* known invulnerable cores */
-static const struct midr_range arm64_ssb_cpus[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
- MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
- {},
-};
-
#ifdef CONFIG_ARM64_ERRATUM_1463225
DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
@@ -514,12 +113,7 @@
has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry,
int scope)
{
- u32 midr = read_cpuid_id();
- /* Cortex-A76 r0p0 - r3p1 */
- struct midr_range range = MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1);
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range(midr, &range) && is_kernel_in_hyp_mode();
+ return is_affected_midr_range_list(entry, scope) && is_kernel_in_hyp_mode();
}
#endif
@@ -566,87 +160,6 @@
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \
CAP_MIDR_RANGE_LIST(midr_list)
-/* Track overall mitigation state. We are only mitigated if all cores are ok */
-static bool __hardenbp_enab = true;
-static bool __spectrev2_safe = true;
-
-int get_spectre_v2_workaround_state(void)
-{
- if (__spectrev2_safe)
- return ARM64_BP_HARDEN_NOT_REQUIRED;
-
- if (!__hardenbp_enab)
- return ARM64_BP_HARDEN_UNKNOWN;
-
- return ARM64_BP_HARDEN_WA_NEEDED;
-}
-
-/*
- * List of CPUs that do not need any Spectre-v2 mitigation at all.
- */
-static const struct midr_range spectre_v2_safe_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
- MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
- MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
- { /* sentinel */ }
-};
-
-/*
- * Track overall bp hardening for all heterogeneous cores in the machine.
- * We are only considered "safe" if all booted cores are known safe.
- */
-static bool __maybe_unused
-check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
-{
- int need_wa;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-
- /* If the CPU has CSV2 set, we're safe */
- if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1),
- ID_AA64PFR0_CSV2_SHIFT))
- return false;
-
- /* Alternatively, we have a list of unaffected CPUs */
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
- return false;
-
- /* Fallback to firmware detection */
- need_wa = detect_harden_bp_fw();
- if (!need_wa)
- return false;
-
- __spectrev2_safe = false;
-
- if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) {
- pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n");
- __hardenbp_enab = false;
- return false;
- }
-
- /* forced off */
- if (__nospectre_v2 || cpu_mitigations_off()) {
- pr_info_once("spectrev2 mitigation disabled by command line option\n");
- __hardenbp_enab = false;
- return false;
- }
-
- if (need_wa < 0) {
- pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n");
- __hardenbp_enab = false;
- }
-
- return (need_wa > 0);
-}
-
-static void
-cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
-{
- cap->matches(cap, SCOPE_LOCAL_CPU);
-}
-
static const __maybe_unused struct midr_range tx2_family_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
@@ -683,9 +196,9 @@
return is_midr_in_range(midr, &range) && has_dic;
}
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
-static const struct midr_range arm64_harden_el2_vectors[] = {
+static const struct midr_range ca57_a72[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
{},
@@ -774,6 +287,8 @@
MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1),
/* Neoverse-N1 r0p0 to r3p1 */
MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 0, 3, 1),
+ /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */
+ MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf),
{},
};
#endif
@@ -784,6 +299,8 @@
MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
/* Brahma-B53 r0p[0] */
MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+ /* Kryo2XX Silver rAp4 */
+ MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4),
{},
};
#endif
@@ -805,10 +322,40 @@
};
#endif
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT
+static const struct midr_range erratum_speculative_at_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_1165522
+ /* Cortex A76 r0p0 to r2p0 */
+ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1530923
+ /* Cortex A55 r0p0 to r2p0 */
+ MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0),
+ /* Kryo4xx Silver (rdpe => r1p0) */
+ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
+#endif
+ {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_ERRATUM_1463225
+static const struct midr_range erratum_1463225[] = {
+ /* Cortex-A76 r0p0 - r3p1 */
+ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1),
+ /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */
+ MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
- .desc = "ARM errata 826319, 827319, 824069, 819472",
+ .desc = "ARM errata 826319, 827319, 824069, or 819472",
.capability = ARM64_WORKAROUND_CLEAN_CACHE,
ERRATA_MIDR_RANGE_LIST(workaround_clean_cache),
.cpu_enable = cpu_enable_cache_maint_trap,
@@ -890,7 +437,7 @@
#endif
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
{
- .desc = "Qualcomm erratum 1009, ARM erratum 1286807",
+ .desc = "Qualcomm erratum 1009, or ARM erratum 1286807",
.capability = ARM64_WORKAROUND_REPEAT_TLBI,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = cpucap_multi_entry_cap_matches,
@@ -906,26 +453,32 @@
},
#endif
{
- .desc = "Branch predictor hardening",
- .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ .desc = "Spectre-v2",
+ .capability = ARM64_SPECTRE_V2,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .matches = check_branch_predictor,
- .cpu_enable = cpu_enable_branch_predictor_hardening,
+ .matches = has_spectre_v2,
+ .cpu_enable = spectre_v2_enable_mitigation,
},
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
{
.desc = "EL2 vector hardening",
.capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
+ ERRATA_MIDR_RANGE_LIST(ca57_a72),
},
#endif
{
- .desc = "Speculative Store Bypass Disable",
- .capability = ARM64_SSBD,
+ .desc = "Spectre-v4",
+ .capability = ARM64_SPECTRE_V4,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .matches = has_ssbd_mitigation,
- .cpu_enable = cpu_enable_ssbd_mitigation,
- .midr_range_list = arm64_ssb_cpus,
+ .matches = has_spectre_v4,
+ .cpu_enable = spectre_v4_enable_mitigation,
+ },
+ {
+ .desc = "Spectre-BHB",
+ .capability = ARM64_SPECTRE_BHB,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = is_spectre_bhb_affected,
+ .cpu_enable = spectre_bhb_enable_mitigation,
},
#ifdef CONFIG_ARM64_ERRATUM_1418040
{
@@ -940,12 +493,11 @@
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
},
#endif
-#ifdef CONFIG_ARM64_ERRATUM_1165522
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT
{
- /* Cortex-A76 r0p0 to r2p0 */
- .desc = "ARM erratum 1165522",
- .capability = ARM64_WORKAROUND_1165522,
- ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+ .desc = "ARM errata 1165522, 1319367, or 1530923",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_AT,
+ ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_list),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_1463225
@@ -954,6 +506,7 @@
.capability = ARM64_WORKAROUND_1463225,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = has_cortex_a76_erratum_1463225,
+ .midr_range_list = erratum_1463225,
},
#endif
#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219
@@ -979,43 +532,16 @@
.cpu_enable = cpu_enable_trap_ctr_access,
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_1508412
+ {
+ /* we depend on the firmware portion for correctness */
+ .desc = "ARM erratum 1508412 (kernel portion)",
+ .capability = ARM64_WORKAROUND_1508412,
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A77,
+ 0, 0,
+ 1, 0),
+ },
+#endif
{
}
};
-
-ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
-ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- switch (get_spectre_v2_workaround_state()) {
- case ARM64_BP_HARDEN_NOT_REQUIRED:
- return sprintf(buf, "Not affected\n");
- case ARM64_BP_HARDEN_WA_NEEDED:
- return sprintf(buf, "Mitigation: Branch predictor hardening\n");
- case ARM64_BP_HARDEN_UNKNOWN:
- default:
- return sprintf(buf, "Vulnerable\n");
- }
-}
-
-ssize_t cpu_show_spec_store_bypass(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- if (__ssb_safe)
- return sprintf(buf, "Not affected\n");
-
- switch (ssbd_state) {
- case ARM64_SSBD_KERNEL:
- case ARM64_SSBD_FORCE_ENABLE:
- if (IS_ENABLED(CONFIG_ARM64_SSBD))
- return sprintf(buf,
- "Mitigation: Speculative Store Bypass disabled via prctl\n");
- }
-
- return sprintf(buf, "Vulnerable\n");
-}
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index 7e07072..e133011 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -15,10 +15,12 @@
#include <asm/smp_plat.h>
extern const struct cpu_operations smp_spin_table_ops;
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern const struct cpu_operations acpi_parking_protocol_ops;
+#endif
extern const struct cpu_operations cpu_psci_ops;
-const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
+static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
@@ -94,7 +96,7 @@
/*
* Read a cpu's enable method and record it in cpu_ops.
*/
-int __init cpu_read_ops(int cpu)
+int __init init_cpu_ops(int cpu)
{
const char *enable_method = cpu_read_enable_method(cpu);
@@ -109,3 +111,8 @@
return 0;
}
+
+const struct cpu_operations *get_cpu_ops(int cpu)
+{
+ return cpu_ops[cpu];
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index acdef8d..c9108ed 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3,6 +3,61 @@
* Contains CPU feature definitions
*
* Copyright (C) 2015 ARM Ltd.
+ *
+ * A note for the weary kernel hacker: the code here is confusing and hard to
+ * follow! That's partly because it's solving a nasty problem, but also because
+ * there's a little bit of over-abstraction that tends to obscure what's going
+ * on behind a maze of helper functions and macros.
+ *
+ * The basic problem is that hardware folks have started gluing together CPUs
+ * with distinct architectural features; in some cases even creating SoCs where
+ * user-visible instructions are available only on a subset of the available
+ * cores. We try to address this by snapshotting the feature registers of the
+ * boot CPU and comparing these with the feature registers of each secondary
+ * CPU when bringing them up. If there is a mismatch, then we update the
+ * snapshot state to indicate the lowest-common denominator of the feature,
+ * known as the "safe" value. This snapshot state can be queried to view the
+ * "sanitised" value of a feature register.
+ *
+ * The sanitised register values are used to decide which capabilities we
+ * have in the system. These may be in the form of traditional "hwcaps"
+ * advertised to userspace or internal "cpucaps" which are used to configure
+ * things like alternative patching and static keys. While a feature mismatch
+ * may result in a TAINT_CPU_OUT_OF_SPEC kernel taint, a capability mismatch
+ * may prevent a CPU from being onlined at all.
+ *
+ * Some implementation details worth remembering:
+ *
+ * - Mismatched features are *always* sanitised to a "safe" value, which
+ * usually indicates that the feature is not supported.
+ *
+ * - A mismatched feature marked with FTR_STRICT will cause a "SANITY CHECK"
+ * warning when onlining an offending CPU and the kernel will be tainted
+ * with TAINT_CPU_OUT_OF_SPEC.
+ *
+ * - Features marked as FTR_VISIBLE have their sanitised value visible to
+ * userspace. FTR_VISIBLE features in registers that are only visible
+ * to EL0 by trapping *must* have a corresponding HWCAP so that late
+ * onlining of CPUs cannot lead to features disappearing at runtime.
+ *
+ * - A "feature" is typically a 4-bit register field. A "capability" is the
+ * high-level description derived from the sanitised field value.
+ *
+ * - Read the Arm ARM (DDI 0487F.a) section D13.1.3 ("Principles of the ID
+ * scheme for fields in ID registers") to understand when feature fields
+ * may be signed or unsigned (FTR_SIGNED and FTR_UNSIGNED accordingly).
+ *
+ * - KVM exposes its own view of the feature registers to guest operating
+ * systems regardless of FTR_VISIBLE. This is typically driven from the
+ * sanitised register values to allow virtual CPUs to be migrated between
+ * arbitrary physical CPUs, but some features not present on the host are
+ * also advertised and emulated. Look at sys_reg_descs[] for the gory
+ * details.
+ *
+ * - If the arm64_ftr_bits[] for a register has a missing field, then this
+ * field is treated as STRICT RES0, including for read_sanitised_ftr_reg().
+ * This is stronger than FTR_HIDDEN and can be used to hide features from
+ * KVM guests.
*/
#define pr_fmt(fmt) "CPU features: " fmt
@@ -10,19 +65,23 @@
#include <linux/bsearch.h>
#include <linux/cpumask.h>
#include <linux/crash_dump.h>
+#include <linux/percpu.h>
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/cpu.h>
+
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
+#include <asm/mte.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
+#include <asm/vectors.h>
#include <asm/virt.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
@@ -45,40 +104,33 @@
/* Need also bit for ARM64_CB_PATCH */
DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+bool arm64_use_ng_mappings = false;
+EXPORT_SYMBOL(arm64_use_ng_mappings);
+
+DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
+
/*
* Flag to indicate if we have computed the system wide
* capabilities based on the boot time active CPUs. This
* will be used to determine if a new booting CPU should
* go through the verification process to make sure that it
* supports the system capabilities, without using a hotplug
- * notifier.
+ * notifier. This is also used to decide if we could use
+ * the fast path for checking constant CPU caps.
*/
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+static inline void finalize_system_capabilities(void)
{
- sys_caps_initialised = true;
+ static_branch_enable(&arm64_const_caps_ready);
}
-static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
+void dump_cpu_features(void)
{
/* file-wide pr_fmt adds "CPU features: " prefix */
pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
- return 0;
}
-static struct notifier_block cpu_hwcaps_notifier = {
- .notifier_call = dump_cpu_hwcaps
-};
-
-static int __init register_cpu_hwcaps_dumper(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list,
- &cpu_hwcaps_notifier);
- return 0;
-}
-__initcall(register_cpu_hwcaps_dumper);
-
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -112,11 +164,15 @@
static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
+static bool __system_matches_cap(unsigned int n);
+
/*
* NOTE: Any changes to the visibility of features should be kept in
* sync with the documentation of the CPU feature register ABI.
*/
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
@@ -133,6 +189,10 @@
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
@@ -143,17 +203,26 @@
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
@@ -168,16 +237,30 @@
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
@@ -187,6 +270,27 @@
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
+ /*
+ * Page size not being supported at Stage-2 is not fatal. You
+ * just give up KVM if PAGE_SIZE isn't supported there. Go fix
+ * your favourite nesting hypervisor.
+ *
+ * There is a small corner case where the hypervisor explicitly
+ * advertises a given granule size at Stage-2 (value 2) on some
+ * vCPUs, and uses the fallback to Stage-1 (value 0) for other
+ * vCPUs. Although this is not forbidden by the architecture, it
+ * indicates that the hypervisor is being silly (or buggy).
+ *
+ * We make no effort to cope with this and pretend that if these
+ * fields are inconsistent across vCPUs, then it isn't worth
+ * trying to bring KVM up.
+ */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1),
/*
* We already refuse to boot CPUs that don't support our configured
* page size, so we can only detect mismatches for a page size other
@@ -212,6 +316,11 @@
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
@@ -222,10 +331,18 @@
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
@@ -244,7 +361,7 @@
* make use of *minLine.
* If we have differing I-cache policies, report it as the weakest - VIPT.
*/
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -255,19 +372,19 @@
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
@@ -282,17 +399,27 @@
};
static const struct arm64_ftr_bits ftr_mvfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_dczid[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0),
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_isar0[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_COPROC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_CMPBRANCH_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITFIELD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITCOUNT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_SWAP_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
static const struct arm64_ftr_bits ftr_id_isar5[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0),
@@ -305,27 +432,94 @@
};
static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CCIDX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_LSM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0),
+
+ /*
+ * SpecSEI = 1 indicates that the PE might generate an SError on an
+ * external abort on speculative read. It is safe to assume that an
+ * SError might be generated than it will not be. Hence it has been
+ * classified as FTR_HIGHER_SAFE.
+ */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_SPECSEI_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_isar4[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SWP_FRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_PSR_M_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_BARRIER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SMC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WRITEBACK_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WITHSHIFTS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_UNPRIV_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_mmfr5[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_ETS_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_isar6[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SPECRES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_FHM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_DP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_JSCVT_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_pfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GIC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRT_FRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SEC_FRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GENTIMER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRTUALIZATION_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_MPROGMOD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SECURITY_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_PROGMOD_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_pfr2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
/* [31:28] TraceFilt */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_dfr1[] = {
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_MTPMU_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -339,7 +533,7 @@
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
* 0. Covers the following 32bit registers:
- * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ * id_isar[1-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
*/
static const struct arm64_ftr_bits ftr_generic_32bits[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
@@ -377,7 +571,7 @@
/* Op1 = 0, CRn = 0, CRm = 1 */
ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
- ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_id_pfr1),
ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0),
ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
@@ -385,18 +579,22 @@
ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
/* Op1 = 0, CRn = 0, CRm = 2 */
- ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_id_isar0),
ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
- ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_id_isar4),
ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+ ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6),
/* Op1 = 0, CRn = 0, CRm = 3 */
ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+ ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2),
+ ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1),
+ ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
@@ -410,6 +608,7 @@
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
+ ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -433,16 +632,16 @@
}
/*
- * get_arm64_ftr_reg - Lookup a feature register entry using its
- * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
- * ascending order of sys_id , we use binary search to find a matching
+ * get_arm64_ftr_reg_nowarn - Looks up a feature register entry using
+ * its sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id, we use binary search to find a matching
* entry.
*
* returns - Upon success, matching ftr_reg entry for id.
* - NULL on failure. It is upto the caller to decide
* the impact of a failure.
*/
-static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id)
{
const struct __ftr_reg_entry *ret;
@@ -456,6 +655,27 @@
return NULL;
}
+/*
+ * get_arm64_ftr_reg - Looks up a feature register entry using
+ * its sys_reg() encoding. This calls get_arm64_ftr_reg_nowarn().
+ *
+ * returns - Upon success, matching ftr_reg entry for id.
+ * - NULL on failure but with an WARN_ON().
+ */
+static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+ struct arm64_ftr_reg *reg;
+
+ reg = get_arm64_ftr_reg_nowarn(sys_id);
+
+ /*
+ * Requesting a non-existent register search is an error. Warn
+ * and let the caller handle it.
+ */
+ WARN_ON(!reg);
+ return reg;
+}
+
static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
s64 ftr_val)
{
@@ -481,7 +701,7 @@
case FTR_HIGHER_OR_ZERO_SAFE:
if (!cur || !new)
break;
- /* Fallthrough */
+ fallthrough;
case FTR_HIGHER_SAFE:
ret = new > cur ? new : cur;
break;
@@ -494,11 +714,52 @@
static void __init sort_ftr_regs(void)
{
- int i;
+ unsigned int i;
- /* Check that the array is sorted so that we can do the binary search */
- for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++)
+ for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) {
+ const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg;
+ const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits;
+ unsigned int j = 0;
+
+ /*
+ * Features here must be sorted in descending order with respect
+ * to their shift values and should not overlap with each other.
+ */
+ for (; ftr_bits->width != 0; ftr_bits++, j++) {
+ unsigned int width = ftr_reg->ftr_bits[j].width;
+ unsigned int shift = ftr_reg->ftr_bits[j].shift;
+ unsigned int prev_shift;
+
+ WARN((shift + width) > 64,
+ "%s has invalid feature at shift %d\n",
+ ftr_reg->name, shift);
+
+ /*
+ * Skip the first feature. There is nothing to
+ * compare against for now.
+ */
+ if (j == 0)
+ continue;
+
+ prev_shift = ftr_reg->ftr_bits[j - 1].shift;
+ WARN((shift + width) > prev_shift,
+ "%s has feature overlap at shift %d\n",
+ ftr_reg->name, shift);
+ }
+
+ /*
+ * Skip the first register. There is nothing to
+ * compare against for now.
+ */
+ if (i == 0)
+ continue;
+ /*
+ * Registers here must be sorted in ascending order with respect
+ * to sys_id for subsequent binary search in get_arm64_ftr_reg()
+ * to work correctly.
+ */
BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+ }
}
/*
@@ -517,9 +778,10 @@
const struct arm64_ftr_bits *ftrp;
struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
- BUG_ON(!reg);
+ if (!reg)
+ return;
- for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+ for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
@@ -581,6 +843,7 @@
init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
@@ -590,18 +853,23 @@
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+ init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+ init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+ init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
+ init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+ init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
@@ -646,7 +914,9 @@
{
struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
- BUG_ON(!regp);
+ if (!regp)
+ return 0;
+
update_cpu_ftr_reg(regp, val);
if ((boot & regp->strict_mask) == (val & regp->strict_mask))
return 0;
@@ -655,6 +925,104 @@
return 1;
}
+static void relax_cpu_ftr_reg(u32 sys_id, int field)
+{
+ const struct arm64_ftr_bits *ftrp;
+ struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+ if (!regp)
+ return;
+
+ for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) {
+ if (ftrp->shift == field) {
+ regp->strict_mask &= ~arm64_ftr_mask(ftrp);
+ break;
+ }
+ }
+
+ /* Bogus field? */
+ WARN_ON(!ftrp->width);
+}
+
+static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+ struct cpuinfo_arm64 *boot)
+{
+ int taint = 0;
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ /*
+ * If we don't have AArch32 at all then skip the checks entirely
+ * as the register values may be UNKNOWN and we're not going to be
+ * using them for anything.
+ */
+ if (!id_aa64pfr0_32bit_el0(pfr0))
+ return taint;
+
+ /*
+ * If we don't have AArch32 at EL1, then relax the strictness of
+ * EL1-dependent register fields to avoid spurious sanity check fails.
+ */
+ if (!id_aa64pfr0_32bit_el1(pfr0)) {
+ relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_SMC_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SECURITY_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_PROGMOD_SHIFT);
+ }
+
+ taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+ info->reg_id_dfr0, boot->reg_id_dfr0);
+ taint |= check_update_ftr_reg(SYS_ID_DFR1_EL1, cpu,
+ info->reg_id_dfr1, boot->reg_id_dfr1);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+ info->reg_id_isar0, boot->reg_id_isar0);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+ info->reg_id_isar1, boot->reg_id_isar1);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+ info->reg_id_isar2, boot->reg_id_isar2);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+ info->reg_id_isar3, boot->reg_id_isar3);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+ info->reg_id_isar4, boot->reg_id_isar4);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+ info->reg_id_isar5, boot->reg_id_isar5);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu,
+ info->reg_id_isar6, boot->reg_id_isar6);
+
+ /*
+ * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+ * ACTLR formats could differ across CPUs and therefore would have to
+ * be trapped for virtualization anyway.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+ info->reg_id_mmfr0, boot->reg_id_mmfr0);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+ info->reg_id_mmfr1, boot->reg_id_mmfr1);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+ info->reg_id_mmfr2, boot->reg_id_mmfr2);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+ info->reg_id_mmfr3, boot->reg_id_mmfr3);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR4_EL1, cpu,
+ info->reg_id_mmfr4, boot->reg_id_mmfr4);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu,
+ info->reg_id_mmfr5, boot->reg_id_mmfr5);
+ taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+ info->reg_id_pfr0, boot->reg_id_pfr0);
+ taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+ info->reg_id_pfr1, boot->reg_id_pfr1);
+ taint |= check_update_ftr_reg(SYS_ID_PFR2_EL1, cpu,
+ info->reg_id_pfr2, boot->reg_id_pfr2);
+ taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+ info->reg_mvfr0, boot->reg_mvfr0);
+ taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+ info->reg_mvfr1, boot->reg_mvfr1);
+ taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+ info->reg_mvfr2, boot->reg_mvfr2);
+
+ return taint;
+}
+
/*
* Update system wide CPU feature registers with the values from a
* non-boot CPU. Also performs SANITY checks to make sure that there
@@ -704,6 +1072,8 @@
info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
+ info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
/*
* Differing PARange support is fine as long as all peripherals and
@@ -725,64 +1095,23 @@
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
- /*
- * If we have AArch32, we care about 32-bit features for compat.
- * If the system doesn't support AArch32, don't update them.
- */
- if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
- id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-
- taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
- info->reg_id_dfr0, boot->reg_id_dfr0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
- info->reg_id_isar0, boot->reg_id_isar0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
- info->reg_id_isar1, boot->reg_id_isar1);
- taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
- info->reg_id_isar2, boot->reg_id_isar2);
- taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
- info->reg_id_isar3, boot->reg_id_isar3);
- taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
- info->reg_id_isar4, boot->reg_id_isar4);
- taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
- info->reg_id_isar5, boot->reg_id_isar5);
-
- /*
- * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
- * ACTLR formats could differ across CPUs and therefore would have to
- * be trapped for virtualization anyway.
- */
- taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
- info->reg_id_mmfr0, boot->reg_id_mmfr0);
- taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
- info->reg_id_mmfr1, boot->reg_id_mmfr1);
- taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
- info->reg_id_mmfr2, boot->reg_id_mmfr2);
- taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
- info->reg_id_mmfr3, boot->reg_id_mmfr3);
- taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
- info->reg_id_pfr0, boot->reg_id_pfr0);
- taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
- info->reg_id_pfr1, boot->reg_id_pfr1);
- taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
- info->reg_mvfr0, boot->reg_mvfr0);
- taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
- info->reg_mvfr1, boot->reg_mvfr1);
- taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
- info->reg_mvfr2, boot->reg_mvfr2);
- }
-
if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
info->reg_zcr, boot->reg_zcr);
/* Probe vector lengths, unless we already gave up on SVE */
if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
- !sys_caps_initialised)
+ !system_capabilities_finalized())
sve_update_vq_map();
}
/*
+ * This relies on a sanitised view of the AArch64 ID registers
+ * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
+ */
+ taint |= update_32bit_cpu_features(cpu, info, boot);
+
+ /*
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
@@ -796,10 +1125,11 @@
{
struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
- /* We shouldn't get a request for an unsupported register */
- BUG_ON(!regp);
+ if (!regp)
+ return 0;
return regp->sys_val;
}
+EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
#define read_sysreg_case(r) \
case r: return read_sysreg_s(r)
@@ -813,17 +1143,22 @@
switch (sys_id) {
read_sysreg_case(SYS_ID_PFR0_EL1);
read_sysreg_case(SYS_ID_PFR1_EL1);
+ read_sysreg_case(SYS_ID_PFR2_EL1);
read_sysreg_case(SYS_ID_DFR0_EL1);
+ read_sysreg_case(SYS_ID_DFR1_EL1);
read_sysreg_case(SYS_ID_MMFR0_EL1);
read_sysreg_case(SYS_ID_MMFR1_EL1);
read_sysreg_case(SYS_ID_MMFR2_EL1);
read_sysreg_case(SYS_ID_MMFR3_EL1);
+ read_sysreg_case(SYS_ID_MMFR4_EL1);
+ read_sysreg_case(SYS_ID_MMFR5_EL1);
read_sysreg_case(SYS_ID_ISAR0_EL1);
read_sysreg_case(SYS_ID_ISAR1_EL1);
read_sysreg_case(SYS_ID_ISAR2_EL1);
read_sysreg_case(SYS_ID_ISAR3_EL1);
read_sysreg_case(SYS_ID_ISAR4_EL1);
read_sysreg_case(SYS_ID_ISAR5_EL1);
+ read_sysreg_case(SYS_ID_ISAR6_EL1);
read_sysreg_case(SYS_MVFR0_EL1);
read_sysreg_case(SYS_MVFR1_EL1);
read_sysreg_case(SYS_MVFR2_EL1);
@@ -838,6 +1173,7 @@
read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
read_sysreg_case(SYS_CNTFRQ_EL0);
read_sysreg_case(SYS_CTR_EL0);
@@ -958,6 +1294,46 @@
return has_cpuid_feature(entry, scope);
}
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+bool kaslr_requires_kpti(void)
+{
+ if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return false;
+
+ /*
+ * E0PD does a similar job to KPTI so can be used instead
+ * where available.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+ u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ if (cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_E0PD_SHIFT))
+ return false;
+ }
+
+ /*
+ * Systems affected by Cavium erratum 24756 are incompatible
+ * with KPTI.
+ */
+ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
+ extern const struct midr_range cavium_erratum_27456_cpus[];
+
+ if (is_midr_in_range_list(read_cpuid_id(),
+ cavium_erratum_27456_cpus))
+ return false;
+ }
+
+ return kaslr_offset() > 0;
+}
+
static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -968,6 +1344,7 @@
static const struct midr_range kpti_safe_list[] = {
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
@@ -975,6 +1352,11 @@
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
{ /* sentinel */ }
};
char const *str = "kpti command line option";
@@ -1000,7 +1382,7 @@
}
/* Useful for KASLR robustness */
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) {
+ if (kaslr_requires_kpti()) {
if (!__kpti_forced) {
str = "KASLR";
__kpti_forced = 1;
@@ -1035,15 +1417,20 @@
extern kpti_remap_fn idmap_kpti_install_ng_mappings;
kpti_remap_fn *remap_fn;
- static bool kpti_applied = false;
int cpu = smp_processor_id();
+ if (__this_cpu_read(this_cpu_vector) == vectors) {
+ const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
+
+ __this_cpu_write(this_cpu_vector, v);
+ }
+
/*
* We don't need to rewrite the page-tables if either we've done
* it already or we have KASLR enabled and therefore have not
* created any global mappings at all.
*/
- if (kpti_applied || kaslr_offset() > 0)
+ if (arm64_use_ng_mappings)
return;
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
@@ -1053,7 +1440,7 @@
cpu_uninstall_idmap();
if (!cpu)
- kpti_applied = true;
+ arm64_use_ng_mappings = true;
return;
}
@@ -1084,6 +1471,7 @@
write_sysreg(tcr, tcr_el1);
isb();
+ local_flush_tlb_all();
}
static bool cpu_has_broken_dbm(void)
@@ -1092,6 +1480,8 @@
static const struct midr_range cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_1024718
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ /* Kryo4xx Silver (rdpe => r1p0) */
+ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif
{},
};
@@ -1142,6 +1532,57 @@
#endif
+#ifdef CONFIG_ARM64_AMU_EXTN
+
+/*
+ * The "amu_cpus" cpumask only signals that the CPU implementation for the
+ * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide
+ * information regarding all the events that it supports. When a CPU bit is
+ * set in the cpumask, the user of this feature can only rely on the presence
+ * of the 4 fixed counters for that CPU. But this does not guarantee that the
+ * counters are enabled or access to these counters is enabled by code
+ * executed at higher exception levels (firmware).
+ */
+static struct cpumask amu_cpus __read_mostly;
+
+bool cpu_has_amu_feat(int cpu)
+{
+ return cpumask_test_cpu(cpu, &amu_cpus);
+}
+
+/* Initialize the use of AMU counters for frequency invariance */
+extern void init_cpu_freq_invariance_counters(void);
+
+static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
+{
+ if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
+ pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
+ smp_processor_id());
+ cpumask_set_cpu(smp_processor_id(), &amu_cpus);
+ init_cpu_freq_invariance_counters();
+ }
+}
+
+static bool has_amu(const struct arm64_cpu_capabilities *cap,
+ int __unused)
+{
+ /*
+ * The AMU extension is a non-conflicting feature: the kernel can
+ * safely run a mix of CPUs with and without support for the
+ * activity monitors extension. Therefore, unconditionally enable
+ * the capability to allow any late CPU to use the feature.
+ *
+ * With this feature unconditionally enabled, the cpu_enable
+ * function will be called for all CPUs that match the criteria,
+ * including secondary and hotplugged, marking this feature as
+ * present on that respective CPU. The enable function will also
+ * print a detection message.
+ */
+
+ return true;
+}
+#endif
+
#ifdef CONFIG_ARM64_VHE
static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
{
@@ -1171,48 +1612,6 @@
WARN_ON(val & (7 << 27 | 7 << 21));
}
-#ifdef CONFIG_ARM64_SSBD
-static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
-{
- if (user_mode(regs))
- return 1;
-
- if (instr & BIT(PSTATE_Imm_shift))
- regs->pstate |= PSR_SSBS_BIT;
- else
- regs->pstate &= ~PSR_SSBS_BIT;
-
- arm64_skip_faulting_instruction(regs, 4);
- return 0;
-}
-
-static struct undef_hook ssbs_emulation_hook = {
- .instr_mask = ~(1U << PSTATE_Imm_shift),
- .instr_val = 0xd500401f | PSTATE_SSBS,
- .fn = ssbs_emulation_handler,
-};
-
-static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
-{
- static bool undef_hook_registered = false;
- static DEFINE_RAW_SPINLOCK(hook_lock);
-
- raw_spin_lock(&hook_lock);
- if (!undef_hook_registered) {
- register_undef_hook(&ssbs_emulation_hook);
- undef_hook_registered = true;
- }
- raw_spin_unlock(&hook_lock);
-
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
- sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
- arm64_set_ssbd_mitigation(false);
- } else {
- arm64_set_ssbd_mitigation(true);
- }
-}
-#endif /* CONFIG_ARM64_SSBD */
-
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
@@ -1236,13 +1635,55 @@
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
-static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap)
+static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
{
- sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
- SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);
+ int boot_val, sec_val;
+
+ /* We don't expect to be called with SCOPE_SYSTEM */
+ WARN_ON(scope == SCOPE_SYSTEM);
+ /*
+ * The ptr-auth feature levels are not intercompatible with lower
+ * levels. Hence we must match ptr-auth feature level of the secondary
+ * CPUs with that of the boot CPU. The level of boot cpu is fetched
+ * from the sanitised register whereas direct register read is done for
+ * the secondary CPUs.
+ * The sanitised feature state is guaranteed to match that of the
+ * boot CPU as a mismatched secondary CPU is parked before it gets
+ * a chance to update the state, with the capability.
+ */
+ boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
+ entry->field_pos, entry->sign);
+ if (scope & SCOPE_BOOT_CPU)
+ return boot_val >= entry->min_field_value;
+ /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
+ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
+ entry->field_pos, entry->sign);
+ return sec_val == boot_val;
+}
+
+static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
+ has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+}
+
+static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
+ int __unused)
+{
+ return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
+ __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
}
#endif /* CONFIG_ARM64_PTR_AUTH */
+#ifdef CONFIG_ARM64_E0PD
+static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
+{
+ if (this_cpu_has_cap(ARM64_HAS_E0PD))
+ sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+}
+#endif /* CONFIG_ARM64_E0PD */
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
static bool enable_pseudo_nmi;
@@ -1259,6 +1700,52 @@
}
#endif
+#ifdef CONFIG_ARM64_BTI
+static void bti_enable(const struct arm64_cpu_capabilities *__unused)
+{
+ /*
+ * Use of X16/X17 for tail-calls and trampolines that jump to
+ * function entry points using BR is a requirement for
+ * marking binaries with GNU_PROPERTY_AARCH64_FEATURE_1_BTI.
+ * So, be strict and forbid other BRs using other registers to
+ * jump onto a PACIxSP instruction:
+ */
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1);
+ isb();
+}
+#endif /* CONFIG_ARM64_BTI */
+
+#ifdef CONFIG_ARM64_MTE
+static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+{
+ /*
+ * Clear the tags in the zero page. This needs to be done via the
+ * linear map which has the Tagged attribute.
+ */
+ if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags))
+ mte_clear_page_tags(lm_alias(empty_zero_page));
+}
+#endif /* CONFIG_ARM64_MTE */
+
+/* Internal helper functions to match cpu capability type */
+static bool
+cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
+{
+ return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU);
+}
+
+static bool
+cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
+{
+ return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
+}
+
+static bool
+cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
+{
+ return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -1283,7 +1770,7 @@
.cpu_enable = cpu_enable_pan,
},
#endif /* CONFIG_ARM64_PAN */
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
{
.desc = "LSE atomic instructions",
.capability = ARM64_HAS_LSE_ATOMICS,
@@ -1294,7 +1781,7 @@
.sign = FTR_UNSIGNED,
.min_field_value = 2,
},
-#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#endif /* CONFIG_ARM64_LSE_ATOMICS */
{
.desc = "Software prefetching using PRFM",
.capability = ARM64_HAS_NO_HW_PREFETCH,
@@ -1342,6 +1829,18 @@
.field_pos = ID_AA64PFR0_EL0_SHIFT,
.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
},
+#ifdef CONFIG_KVM
+ {
+ .desc = "32-bit EL1 Support",
+ .capability = ARM64_HAS_32BIT_EL1,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR0_EL1_SHIFT,
+ .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
+ },
+#endif
{
.desc = "Kernel page table isolation (KPTI)",
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
@@ -1411,6 +1910,24 @@
.cpu_enable = cpu_clear_disr,
},
#endif /* CONFIG_ARM64_RAS_EXTN */
+#ifdef CONFIG_ARM64_AMU_EXTN
+ {
+ /*
+ * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
+ * Therefore, don't provide .desc as we don't want the detection
+ * message to be shown until at least one CPU is detected to
+ * support the feature.
+ */
+ .capability = ARM64_HAS_AMU_EXTN,
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ .matches = has_amu,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR0_AMU_SHIFT,
+ .min_field_value = ID_AA64PFR0_AMU,
+ .cpu_enable = cpu_amu_enable,
+ },
+#endif /* CONFIG_ARM64_AMU_EXTN */
{
.desc = "Data cache clean to the PoU not required for I/D coherence",
.capability = ARM64_HAS_CACHE_IDC,
@@ -1435,6 +1952,26 @@
.matches = has_cpuid_feature,
.cpu_enable = cpu_has_fwb,
},
+ {
+ .desc = "ARMv8.4 Translation Table Level",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_ARMv8_4_TTL,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_TTL_SHIFT,
+ .min_field_value = 1,
+ .matches = has_cpuid_feature,
+ },
+ {
+ .desc = "TLB range maintenance instructions",
+ .capability = ARM64_HAS_TLB_RANGE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_TLB_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = ID_AA64ISAR0_TLB_RANGE,
+ },
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
@@ -1464,19 +2001,16 @@
.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
.min_field_value = 1,
},
-#ifdef CONFIG_ARM64_SSBD
{
.desc = "Speculative Store Bypassing Safe (SSBS)",
.capability = ARM64_SSBS,
- .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.field_pos = ID_AA64PFR1_SSBS_SHIFT,
.sign = FTR_UNSIGNED,
.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
- .cpu_enable = cpu_enable_ssbs,
},
-#endif
#ifdef CONFIG_ARM64_CNP
{
.desc = "Common not Private translations",
@@ -1504,24 +2038,27 @@
{
.desc = "Address authentication (architected algorithm)",
.capability = ARM64_HAS_ADDRESS_AUTH_ARCH,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_APA_SHIFT,
.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
- .matches = has_cpuid_feature,
- .cpu_enable = cpu_enable_address_auth,
+ .matches = has_address_auth_cpucap,
},
{
.desc = "Address authentication (IMP DEF algorithm)",
.capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_API_SHIFT,
.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
- .matches = has_cpuid_feature,
- .cpu_enable = cpu_enable_address_auth,
+ .matches = has_address_auth_cpucap,
+ },
+ {
+ .capability = ARM64_HAS_ADDRESS_AUTH,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_address_auth_metacap,
},
{
.desc = "Generic authentication (architected algorithm)",
@@ -1543,6 +2080,11 @@
.min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
.matches = has_cpuid_feature,
},
+ {
+ .capability = ARM64_HAS_GENERIC_AUTH,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_generic_auth,
+ },
#endif /* CONFIG_ARM64_PTR_AUTH */
#ifdef CONFIG_ARM64_PSEUDO_NMI
{
@@ -1559,6 +2101,61 @@
.min_field_value = 1,
},
#endif
+#ifdef CONFIG_ARM64_E0PD
+ {
+ .desc = "E0PD",
+ .capability = ARM64_HAS_E0PD,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_E0PD_SHIFT,
+ .matches = has_cpuid_feature,
+ .min_field_value = 1,
+ .cpu_enable = cpu_enable_e0pd,
+ },
+#endif
+#ifdef CONFIG_ARCH_RANDOM
+ {
+ .desc = "Random Number Generator",
+ .capability = ARM64_HAS_RNG,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = 1,
+ },
+#endif
+#ifdef CONFIG_ARM64_BTI
+ {
+ .desc = "Branch Target Identification",
+ .capability = ARM64_BTI,
+#ifdef CONFIG_ARM64_BTI_KERNEL
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+#else
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+#endif
+ .matches = has_cpuid_feature,
+ .cpu_enable = bti_enable,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .field_pos = ID_AA64PFR1_BT_SHIFT,
+ .min_field_value = ID_AA64PFR1_BT_BTI,
+ .sign = FTR_UNSIGNED,
+ },
+#endif
+#ifdef CONFIG_ARM64_MTE
+ {
+ .desc = "Memory Tagging Extension",
+ .capability = ARM64_MTE,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .field_pos = ID_AA64PFR1_MTE_SHIFT,
+ .min_field_value = ID_AA64PFR1_MTE,
+ .sign = FTR_UNSIGNED,
+ .cpu_enable = cpu_enable_mte,
+ },
+#endif /* CONFIG_ARM64_MTE */
{},
};
@@ -1636,6 +2233,7 @@
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -1649,6 +2247,9 @@
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
@@ -1656,14 +2257,27 @@
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
#endif
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+#ifdef CONFIG_ARM64_BTI
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
+#ifdef CONFIG_ARM64_MTE
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+#endif /* CONFIG_ARM64_MTE */
+ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
+ HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
+ HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
{},
};
@@ -1859,10 +2473,8 @@
* Run through the list of capabilities to check for conflicts.
* If the system has already detected a capability, take necessary
* action on this CPU.
- *
- * Returns "false" on conflicts.
*/
-static bool verify_local_cpu_caps(u16 scope_mask)
+static void verify_local_cpu_caps(u16 scope_mask)
{
int i;
bool cpu_has_cap, system_has_cap;
@@ -1907,10 +2519,12 @@
pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n",
smp_processor_id(), caps->capability,
caps->desc, system_has_cap, cpu_has_cap);
- return false;
- }
- return true;
+ if (cpucap_panic_on_conflict(caps))
+ cpu_panic_kernel();
+ else
+ cpu_die_early();
+ }
}
/*
@@ -1920,12 +2534,8 @@
static void check_early_cpu_features(void)
{
verify_cpu_asid_bits();
- /*
- * Early features are used by the kernel already. If there
- * is a conflict, we cannot proceed further.
- */
- if (!verify_local_cpu_caps(SCOPE_BOOT_CPU))
- cpu_panic_kernel();
+
+ verify_local_cpu_caps(SCOPE_BOOT_CPU);
}
static void
@@ -1957,6 +2567,36 @@
/* Add checks on other ZCR bits here if necessary */
}
+static void verify_hyp_capabilities(void)
+{
+ u64 safe_mmfr1, mmfr0, mmfr1;
+ int parange, ipa_max;
+ unsigned int safe_vmid_bits, vmid_bits;
+
+ if (!IS_ENABLED(CONFIG_KVM))
+ return;
+
+ safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+
+ /* Verify VMID bits */
+ safe_vmid_bits = get_vmid_bits(safe_mmfr1);
+ vmid_bits = get_vmid_bits(mmfr1);
+ if (vmid_bits < safe_vmid_bits) {
+ pr_crit("CPU%d: VMID width mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ /* Verify IPA range */
+ parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_PARANGE_SHIFT);
+ ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+ if (ipa_max < get_kvm_ipa_limit()) {
+ pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+}
/*
* Run through the enabled system capabilities and enable() it on this CPU.
@@ -1973,8 +2613,7 @@
* check_early_cpu_features(), as they need to be verified
* on all secondary CPUs.
*/
- if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU))
- cpu_die_early();
+ verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU);
verify_local_elf_hwcaps(arm64_elf_hwcaps);
@@ -1983,6 +2622,9 @@
if (system_supports_sve())
verify_sve_features();
+
+ if (is_hyp_mode_available())
+ verify_hyp_capabilities();
}
void check_local_cpu_capabilities(void)
@@ -1999,7 +2641,7 @@
* Otherwise, this CPU should verify that it has all the system
* advertised capabilities.
*/
- if (!sys_caps_initialised)
+ if (!system_capabilities_finalized())
update_cpu_capabilities(SCOPE_LOCAL_CPU);
else
verify_local_cpu_capabilities();
@@ -2013,14 +2655,6 @@
enable_cpu_capabilities(SCOPE_BOOT_CPU);
}
-DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
-EXPORT_SYMBOL(arm64_const_caps_ready);
-
-static void __init mark_const_caps_ready(void)
-{
- static_branch_enable(&arm64_const_caps_ready);
-}
-
bool this_cpu_has_cap(unsigned int n)
{
if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
@@ -2033,6 +2667,23 @@
return false;
}
+/*
+ * This helper function is used in a narrow window when,
+ * - The system wide safe registers are set with all the SMP CPUs and,
+ * - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
+ * In all other cases cpus_have_{const_}cap() should be used.
+ */
+static bool __system_matches_cap(unsigned int n)
+{
+ if (n < ARM64_NCAPS) {
+ const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+
+ if (cap)
+ return cap->matches(cap, SCOPE_SYSTEM);
+ }
+ return false;
+}
+
void cpu_set_feature(unsigned int num)
{
WARN_ON(num >= MAX_CPU_FEATURES);
@@ -2079,7 +2730,6 @@
u32 cwg;
setup_system_capabilities();
- mark_const_caps_ready();
setup_elf_hwcaps(arm64_elf_hwcaps);
if (system_supports_32bit_el0())
@@ -2092,7 +2742,7 @@
minsigstksz_setup();
/* Advertise that we have computed the system capabilities */
- set_sys_caps_initialised();
+ finalize_system_capabilities();
/*
* Check for sane CTR_EL0.CWG value.
@@ -2106,7 +2756,7 @@
static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
- return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
+ return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO));
}
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
@@ -2163,7 +2813,7 @@
if (sys_reg_CRm(id) == 0)
return emulate_id_reg(id, valp);
- regp = get_arm64_ftr_reg(id);
+ regp = get_arm64_ftr_reg_nowarn(id);
if (regp)
*valp = arm64_ftr_reg_user_value(regp);
else
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index e4d6af2..b512b55 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -18,11 +18,11 @@
int arm_cpuidle_init(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
int ret = -EOPNOTSUPP;
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend &&
- cpu_ops[cpu]->cpu_init_idle)
- ret = cpu_ops[cpu]->cpu_init_idle(cpu);
+ if (ops && ops->cpu_suspend && ops->cpu_init_idle)
+ ret = ops->cpu_init_idle(cpu);
return ret;
}
@@ -37,8 +37,9 @@
int arm_cpuidle_suspend(int index)
{
int cpu = smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
- return cpu_ops[cpu]->cpu_suspend(index);
+ return ops->cpu_suspend(index);
}
#ifdef CONFIG_ACPI
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 05933c0..4c0e727 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -34,93 +34,105 @@
static struct cpuinfo_arm64 boot_cpu_data;
static const char *icache_policy_str[] = {
- [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN",
+ [ICACHE_POLICY_VPIPT] = "VPIPT",
+ [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
[ICACHE_POLICY_VIPT] = "VIPT",
[ICACHE_POLICY_PIPT] = "PIPT",
- [ICACHE_POLICY_VPIPT] = "VPIPT",
};
unsigned long __icache_flags;
static const char *const hwcap_str[] = {
- "fp",
- "asimd",
- "evtstrm",
- "aes",
- "pmull",
- "sha1",
- "sha2",
- "crc32",
- "atomics",
- "fphp",
- "asimdhp",
- "cpuid",
- "asimdrdm",
- "jscvt",
- "fcma",
- "lrcpc",
- "dcpop",
- "sha3",
- "sm3",
- "sm4",
- "asimddp",
- "sha512",
- "sve",
- "asimdfhm",
- "dit",
- "uscat",
- "ilrcpc",
- "flagm",
- "ssbs",
- "sb",
- "paca",
- "pacg",
- "dcpodp",
- "sve2",
- "sveaes",
- "svepmull",
- "svebitperm",
- "svesha3",
- "svesm4",
- "flagm2",
- "frint",
- NULL
+ [KERNEL_HWCAP_FP] = "fp",
+ [KERNEL_HWCAP_ASIMD] = "asimd",
+ [KERNEL_HWCAP_EVTSTRM] = "evtstrm",
+ [KERNEL_HWCAP_AES] = "aes",
+ [KERNEL_HWCAP_PMULL] = "pmull",
+ [KERNEL_HWCAP_SHA1] = "sha1",
+ [KERNEL_HWCAP_SHA2] = "sha2",
+ [KERNEL_HWCAP_CRC32] = "crc32",
+ [KERNEL_HWCAP_ATOMICS] = "atomics",
+ [KERNEL_HWCAP_FPHP] = "fphp",
+ [KERNEL_HWCAP_ASIMDHP] = "asimdhp",
+ [KERNEL_HWCAP_CPUID] = "cpuid",
+ [KERNEL_HWCAP_ASIMDRDM] = "asimdrdm",
+ [KERNEL_HWCAP_JSCVT] = "jscvt",
+ [KERNEL_HWCAP_FCMA] = "fcma",
+ [KERNEL_HWCAP_LRCPC] = "lrcpc",
+ [KERNEL_HWCAP_DCPOP] = "dcpop",
+ [KERNEL_HWCAP_SHA3] = "sha3",
+ [KERNEL_HWCAP_SM3] = "sm3",
+ [KERNEL_HWCAP_SM4] = "sm4",
+ [KERNEL_HWCAP_ASIMDDP] = "asimddp",
+ [KERNEL_HWCAP_SHA512] = "sha512",
+ [KERNEL_HWCAP_SVE] = "sve",
+ [KERNEL_HWCAP_ASIMDFHM] = "asimdfhm",
+ [KERNEL_HWCAP_DIT] = "dit",
+ [KERNEL_HWCAP_USCAT] = "uscat",
+ [KERNEL_HWCAP_ILRCPC] = "ilrcpc",
+ [KERNEL_HWCAP_FLAGM] = "flagm",
+ [KERNEL_HWCAP_SSBS] = "ssbs",
+ [KERNEL_HWCAP_SB] = "sb",
+ [KERNEL_HWCAP_PACA] = "paca",
+ [KERNEL_HWCAP_PACG] = "pacg",
+ [KERNEL_HWCAP_DCPODP] = "dcpodp",
+ [KERNEL_HWCAP_SVE2] = "sve2",
+ [KERNEL_HWCAP_SVEAES] = "sveaes",
+ [KERNEL_HWCAP_SVEPMULL] = "svepmull",
+ [KERNEL_HWCAP_SVEBITPERM] = "svebitperm",
+ [KERNEL_HWCAP_SVESHA3] = "svesha3",
+ [KERNEL_HWCAP_SVESM4] = "svesm4",
+ [KERNEL_HWCAP_FLAGM2] = "flagm2",
+ [KERNEL_HWCAP_FRINT] = "frint",
+ [KERNEL_HWCAP_SVEI8MM] = "svei8mm",
+ [KERNEL_HWCAP_SVEF32MM] = "svef32mm",
+ [KERNEL_HWCAP_SVEF64MM] = "svef64mm",
+ [KERNEL_HWCAP_SVEBF16] = "svebf16",
+ [KERNEL_HWCAP_I8MM] = "i8mm",
+ [KERNEL_HWCAP_BF16] = "bf16",
+ [KERNEL_HWCAP_DGH] = "dgh",
+ [KERNEL_HWCAP_RNG] = "rng",
+ [KERNEL_HWCAP_BTI] = "bti",
+ [KERNEL_HWCAP_MTE] = "mte",
+ [KERNEL_HWCAP_ECV] = "ecv",
+ [KERNEL_HWCAP_AFP] = "afp",
+ [KERNEL_HWCAP_RPRES] = "rpres",
};
#ifdef CONFIG_COMPAT
+#define COMPAT_KERNEL_HWCAP(x) const_ilog2(COMPAT_HWCAP_ ## x)
static const char *const compat_hwcap_str[] = {
- "swp",
- "half",
- "thumb",
- "26bit",
- "fastmult",
- "fpa",
- "vfp",
- "edsp",
- "java",
- "iwmmxt",
- "crunch",
- "thumbee",
- "neon",
- "vfpv3",
- "vfpv3d16",
- "tls",
- "vfpv4",
- "idiva",
- "idivt",
- "vfpd32",
- "lpae",
- "evtstrm",
- NULL
+ [COMPAT_KERNEL_HWCAP(SWP)] = "swp",
+ [COMPAT_KERNEL_HWCAP(HALF)] = "half",
+ [COMPAT_KERNEL_HWCAP(THUMB)] = "thumb",
+ [COMPAT_KERNEL_HWCAP(26BIT)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult",
+ [COMPAT_KERNEL_HWCAP(FPA)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(VFP)] = "vfp",
+ [COMPAT_KERNEL_HWCAP(EDSP)] = "edsp",
+ [COMPAT_KERNEL_HWCAP(JAVA)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(IWMMXT)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(CRUNCH)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(THUMBEE)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(NEON)] = "neon",
+ [COMPAT_KERNEL_HWCAP(VFPv3)] = "vfpv3",
+ [COMPAT_KERNEL_HWCAP(VFPV3D16)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(TLS)] = "tls",
+ [COMPAT_KERNEL_HWCAP(VFPv4)] = "vfpv4",
+ [COMPAT_KERNEL_HWCAP(IDIVA)] = "idiva",
+ [COMPAT_KERNEL_HWCAP(IDIVT)] = "idivt",
+ [COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(LPAE)] = "lpae",
+ [COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm",
};
+#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x)
static const char *const compat_hwcap2_str[] = {
- "aes",
- "pmull",
- "sha1",
- "sha2",
- "crc32",
- NULL
+ [COMPAT_KERNEL_HWCAP2(AES)] = "aes",
+ [COMPAT_KERNEL_HWCAP2(PMULL)] = "pmull",
+ [COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1",
+ [COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2",
+ [COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32",
};
#endif /* CONFIG_COMPAT */
@@ -156,16 +168,25 @@
seq_puts(m, "Features\t:");
if (compat) {
#ifdef CONFIG_COMPAT
- for (j = 0; compat_hwcap_str[j]; j++)
- if (compat_elf_hwcap & (1 << j))
- seq_printf(m, " %s", compat_hwcap_str[j]);
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
+ if (compat_elf_hwcap & (1 << j)) {
+ /*
+ * Warn once if any feature should not
+ * have been present on arm64 platform.
+ */
+ if (WARN_ON_ONCE(!compat_hwcap_str[j]))
+ continue;
- for (j = 0; compat_hwcap2_str[j]; j++)
+ seq_printf(m, " %s", compat_hwcap_str[j]);
+ }
+ }
+
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
if (compat_elf_hwcap2 & (1 << j))
seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
} else {
- for (j = 0; hwcap_str[j]; j++)
+ for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
if (cpu_have_feature(j))
seq_printf(m, " %s", hwcap_str[j]);
}
@@ -303,6 +324,8 @@
}
return 0;
}
+device_initcall(cpuinfo_regs_init);
+
static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
{
unsigned int cpu = smp_processor_id();
@@ -314,11 +337,11 @@
case ICACHE_POLICY_VPIPT:
set_bit(ICACHEF_VPIPT, &__icache_flags);
break;
- default:
- /* Fallthrough */
+ case ICACHE_POLICY_RESERVED:
case ICACHE_POLICY_VIPT:
/* Assume aliasing */
set_bit(ICACHEF_ALIASING, &__icache_flags);
+ break;
}
pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
@@ -329,7 +352,7 @@
info->reg_cntfrq = arch_timer_get_cntfrq();
/*
* Use the effective value of the CTR_EL0 than the raw value
- * exposed by the CPU. CTR_E0.IDC field value must be interpreted
+ * exposed by the CPU. CTR_EL0.IDC field value must be interpreted
* with the CLIDR_EL1 fields to avoid triggering false warnings
* when there is a mismatch across the CPUs. Keep track of the
* effective value of the CTR_EL0 in our internal records for
@@ -344,6 +367,7 @@
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+ info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
@@ -354,18 +378,23 @@
/* Update the 32bit ID registers only if AArch32 is implemented */
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+ info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+ info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+ info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
+ info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+ info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
@@ -394,5 +423,3 @@
boot_cpu_data = *info;
init_cpu_features(&boot_cpu_data);
}
-
-device_initcall(cpuinfo_regs_init);
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index ca4c3e1..314391a 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -5,7 +5,16 @@
*/
#include <linux/crash_core.h>
+#include <asm/cpufeature.h>
#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+static inline u64 get_tcr_el1_t1sz(void);
+
+static inline u64 get_tcr_el1_t1sz(void)
+{
+ return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+}
void arch_crash_save_vmcoreinfo(void)
{
@@ -15,5 +24,10 @@
kimage_voffset);
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
PHYS_OFFSET);
+ vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n",
+ get_tcr_el1_t1sz());
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n",
+ system_supports_address_auth() ?
+ ptrauth_kernel_pac_mask() : 0);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index d64a3c1..fa76151 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -130,7 +130,7 @@
return 0;
}
-static int debug_monitors_init(void)
+static int __init debug_monitors_init(void)
{
return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
"arm64/debug_monitors:starting",
@@ -379,15 +379,13 @@
}
NOKPROBE_SYMBOL(aarch32_break_handler);
-static int __init debug_traps_init(void)
+void __init debug_traps_init(void)
{
hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
TRAP_TRACE, "single-step handler");
hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
- TRAP_BRKPT, "ptrace BRK handler");
- return 0;
+ TRAP_BRKPT, "BRK handler");
}
-arch_initcall(debug_traps_init);
/* Re-enable single step for syscall restarting. */
void user_rewind_single_step(struct task_struct *task)
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 304d5b0..0073b24 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -10,81 +10,35 @@
#include <asm/assembler.h>
-#define EFI_LOAD_ERROR 0x8000000000000001
-
__INIT
+SYM_CODE_START(efi_enter_kernel)
/*
- * We arrive here from the EFI boot manager with:
- *
- * * CPU in little-endian mode
- * * MMU on with identity-mapped RAM
- * * Icache and Dcache on
- *
- * We will most likely be running from some place other than where
- * we want to be. The kernel image wants to be placed at TEXT_OFFSET
- * from start of RAM.
+ * efi_pe_entry() will have copied the kernel image if necessary and we
+ * end up here with device tree address in x1 and the kernel entry
+ * point stored in x0. Save those values in registers which are
+ * callee preserved.
*/
-ENTRY(entry)
- /*
- * Create a stack frame to save FP/LR with extra space
- * for image_addr variable passed to efi_entry().
- */
- stp x29, x30, [sp, #-32]!
- mov x29, sp
+ ldr w2, =primary_entry_offset
+ add x19, x0, x2 // relocated Image entrypoint
+ mov x20, x1 // DTB address
/*
- * Call efi_entry to do the real work.
- * x0 and x1 are already set up by firmware. Current runtime
- * address of image is calculated and passed via *image_addr.
- *
- * unsigned long efi_entry(void *handle,
- * efi_system_table_t *sys_table,
- * unsigned long *image_addr) ;
- */
- adr_l x8, _text
- add x2, sp, 16
- str x8, [x2]
- bl efi_entry
- cmn x0, #1
- b.eq efi_load_fail
-
- /*
- * efi_entry() will have copied the kernel image if necessary and we
- * return here with device tree address in x0 and the kernel entry
- * point stored at *image_addr. Save those values in registers which
- * are callee preserved.
- */
- mov x20, x0 // DTB address
- ldr x0, [sp, #16] // relocated _text address
- ldr w21, =stext_offset
- add x21, x0, x21
-
- /*
- * Calculate size of the kernel Image (same for original and copy).
- */
- adr_l x1, _text
- adr_l x2, _edata
- sub x1, x2, x1
-
- /*
- * Flush the copied Image to the PoC, and ensure it is not shadowed by
+ * Clean the copied Image to the PoC, and ensure it is not shadowed by
* stale icache entries from before relocation.
*/
- bl __flush_dcache_area
+ ldr w1, =kernel_size
+ bl __clean_dcache_area_poc
ic ialluis
/*
- * Ensure that the rest of this function (in the original Image) is
- * visible when the caches are disabled. The I-cache can't have stale
- * entries for the VA range of the current image, so no maintenance is
- * necessary.
+ * Clean the remainder of this routine to the PoC
+ * so that we can safely disable the MMU and caches.
*/
- adr x0, entry
- adr x1, entry_end
- sub x1, x1, x0
- bl __flush_dcache_area
-
+ adr x0, 0f
+ ldr w1, 3f
+ bl __clean_dcache_area_poc
+0:
/* Turn off Dcache and MMU */
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
@@ -109,12 +63,6 @@
mov x1, xzr
mov x2, xzr
mov x3, xzr
- br x21
-
-efi_load_fail:
- mov x0, #EFI_LOAD_ERROR
- ldp x29, x30, [sp], #32
- ret
-
-entry_end:
-ENDPROC(entry)
+ br x19
+SYM_CODE_END(efi_enter_kernel)
+3: .long . - 0b
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index a7cfacc..a71844f 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -27,17 +27,17 @@
.long __initdata_begin - efi_header_end // SizeOfCode
.long __pecoff_data_size // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
- .long __efistub_entry - _head // AddressOfEntryPoint
+ .long __efistub_efi_pe_entry - _head // AddressOfEntryPoint
.long efi_header_end - _head // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
- .long SZ_4K // SectionAlignment
+ .long SEGMENT_ALIGN // SectionAlignment
.long PECOFF_FILE_ALIGNMENT // FileAlignment
.short 0 // MajorOperatingSystemVersion
.short 0 // MinorOperatingSystemVersion
- .short 0 // MajorImageVersion
- .short 0 // MinorImageVersion
+ .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion
+ .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion
.short 0 // MajorSubsystemVersion
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
@@ -147,6 +147,6 @@
* correctly at this alignment, we must ensure that .text is
* placed at a 4k boundary in the Image to begin with.
*/
- .align 12
+ .balign SEGMENT_ALIGN
efi_header_end:
.endm
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc7110..75691a2 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -5,7 +5,7 @@
#include <linux/linkage.h>
-ENTRY(__efi_rt_asm_wrapper)
+SYM_FUNC_START(__efi_rt_asm_wrapper)
stp x29, x30, [sp, #-32]!
mov x29, sp
@@ -34,5 +34,14 @@
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
-ENDPROC(__efi_rt_asm_wrapper)
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
+SYM_FUNC_END(__efi_rt_asm_wrapper)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index d0cf596..fa02efb 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -54,7 +54,7 @@
}
/* we will fill this structure from the stub, so don't put it in .bss */
-struct screen_info screen_info __section(.data);
+struct screen_info screen_info __section(".data");
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
new file mode 100644
index 0000000..ec120ed
--- /dev/null
+++ b/arch/arm64/kernel/entry-common.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exception handling code
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ */
+
+#include <linux/context_tracking.h>
+#include <linux/ptrace.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/kprobes.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+/*
+ * This is intended to match the logic in irqentry_enter(), handling the kernel
+ * mode transitions only.
+ */
+static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+{
+ regs->exit_rcu = false;
+
+ if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ rcu_irq_enter();
+ trace_hardirqs_off_finish();
+
+ regs->exit_rcu = true;
+ return;
+ }
+
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ rcu_irq_enter_check_tick();
+ trace_hardirqs_off_finish();
+}
+
+/*
+ * This is intended to match the logic in irqentry_exit(), handling the kernel
+ * mode transitions only, and with preemption handled elsewhere.
+ */
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+{
+ lockdep_assert_irqs_disabled();
+
+ if (interrupts_enabled(regs)) {
+ if (regs->exit_rcu) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ rcu_irq_exit();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ return;
+ }
+
+ trace_hardirqs_on();
+ } else {
+ if (regs->exit_rcu)
+ rcu_irq_exit();
+ }
+}
+
+void noinstr arm64_enter_nmi(struct pt_regs *regs)
+{
+ regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+ __nmi_enter();
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirq_enter();
+ rcu_nmi_enter();
+
+ trace_hardirqs_off_finish();
+ ftrace_nmi_enter();
+}
+
+void noinstr arm64_exit_nmi(struct pt_regs *regs)
+{
+ bool restore = regs->lockdep_hardirqs;
+
+ ftrace_nmi_exit();
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+
+ rcu_nmi_exit();
+ lockdep_hardirq_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ __nmi_exit();
+}
+
+asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ arm64_enter_nmi(regs);
+ else
+ enter_from_kernel_mode(regs);
+}
+
+asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ arm64_exit_nmi(regs);
+ else
+ exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ far = untagged_addr(far);
+ do_mem_abort(far, esr, regs);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_sp_pc_abort(far, esr, regs);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_undef(struct pt_regs *regs)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_undefinstr(regs);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ bad_mode(regs, 0, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
+static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+{
+ regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ rcu_nmi_enter();
+
+ trace_hardirqs_off_finish();
+}
+
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+{
+ bool restore = regs->lockdep_hardirqs;
+
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+
+ rcu_nmi_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ arm64_enter_el1_dbg(regs);
+ do_debug_exception(far, esr, regs);
+ arm64_exit_el1_dbg(regs);
+}
+
+static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_ptrauth_fault(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
+asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_DABT_CUR:
+ case ESR_ELx_EC_IABT_CUR:
+ el1_abort(regs, esr);
+ break;
+ /*
+ * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a
+ * recursive exception when trying to push the initial pt_regs.
+ */
+ case ESR_ELx_EC_PC_ALIGN:
+ el1_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_SYS64:
+ case ESR_ELx_EC_UNKNOWN:
+ el1_undef(regs);
+ break;
+ case ESR_ELx_EC_BREAKPT_CUR:
+ case ESR_ELx_EC_SOFTSTP_CUR:
+ case ESR_ELx_EC_WATCHPT_CUR:
+ case ESR_ELx_EC_BRK64:
+ el1_dbg(regs, esr);
+ break;
+ case ESR_ELx_EC_FPAC:
+ el1_fpac(regs, esr);
+ break;
+ default:
+ el1_inv(regs, esr);
+ }
+}
+
+asmlinkage void noinstr enter_from_user_mode(void)
+{
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ CT_WARN_ON(ct_state() != CONTEXT_USER);
+ user_exit_irqoff();
+ trace_hardirqs_off_finish();
+}
+
+asmlinkage void noinstr exit_to_user_mode(void)
+{
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ user_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ far = untagged_addr(far);
+ do_mem_abort(far, esr, regs);
+}
+
+static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ /*
+ * We've taken an instruction abort from userspace and not yet
+ * re-enabled IRQs. If the address is a kernel address, apply
+ * BP hardening prior to enabling IRQs and pre-emption.
+ */
+ if (!is_ttbr0_addr(far))
+ arm64_apply_bp_hardening();
+
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_mem_abort(far, esr, regs);
+}
+
+static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_fpsimd_acc(esr, regs);
+}
+
+static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sve_acc(esr, regs);
+}
+
+static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_fpsimd_exc(esr, regs);
+}
+
+static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sysinstr(esr, regs);
+}
+
+static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ if (!is_ttbr0_addr(instruction_pointer(regs)))
+ arm64_apply_bp_hardening();
+
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sp_pc_abort(far, esr, regs);
+}
+
+static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sp_pc_abort(regs->sp, esr, regs);
+}
+
+static void noinstr el0_undef(struct pt_regs *regs)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_undefinstr(regs);
+}
+
+static void noinstr el0_bti(struct pt_regs *regs)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_bti(regs);
+}
+
+static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ bad_el0_sync(regs, 0, esr);
+}
+
+static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
+{
+ /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ unsigned long far = read_sysreg(far_el1);
+
+ enter_from_user_mode();
+ do_debug_exception(far, esr, regs);
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+}
+
+static void noinstr el0_svc(struct pt_regs *regs)
+{
+ enter_from_user_mode();
+ do_el0_svc(regs);
+}
+
+static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_ptrauth_fault(regs, esr);
+}
+
+asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_SVC64:
+ el0_svc(regs);
+ break;
+ case ESR_ELx_EC_DABT_LOW:
+ el0_da(regs, esr);
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ el0_ia(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_ASIMD:
+ el0_fpsimd_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_SVE:
+ el0_sve_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_EXC64:
+ el0_fpsimd_exc(regs, esr);
+ break;
+ case ESR_ELx_EC_SYS64:
+ case ESR_ELx_EC_WFx:
+ el0_sys(regs, esr);
+ break;
+ case ESR_ELx_EC_SP_ALIGN:
+ el0_sp(regs, esr);
+ break;
+ case ESR_ELx_EC_PC_ALIGN:
+ el0_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_UNKNOWN:
+ el0_undef(regs);
+ break;
+ case ESR_ELx_EC_BTI:
+ el0_bti(regs);
+ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+ case ESR_ELx_EC_BRK64:
+ el0_dbg(regs, esr);
+ break;
+ case ESR_ELx_EC_FPAC:
+ el0_fpac(regs, esr);
+ break;
+ default:
+ el0_inv(regs, esr);
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode();
+ local_daif_restore(DAIF_PROCCTX);
+ do_cp15instr(esr, regs);
+}
+
+static void noinstr el0_svc_compat(struct pt_regs *regs)
+{
+ enter_from_user_mode();
+ do_el0_svc_compat(regs);
+}
+
+asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_SVC32:
+ el0_svc_compat(regs);
+ break;
+ case ESR_ELx_EC_DABT_LOW:
+ el0_da(regs, esr);
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ el0_ia(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_ASIMD:
+ el0_fpsimd_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_EXC32:
+ el0_fpsimd_exc(regs, esr);
+ break;
+ case ESR_ELx_EC_PC_ALIGN:
+ el0_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_UNKNOWN:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_CP14_64:
+ el0_undef(regs);
+ break;
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ el0_cp15(regs, esr);
+ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+ case ESR_ELx_EC_BKPT32:
+ el0_dbg(regs, esr);
+ break;
+ default:
+ el0_inv(regs, esr);
+ }
+}
+#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 0f24eae..2ca395c 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -16,34 +16,59 @@
*
* x0 - pointer to struct fpsimd_state
*/
-ENTRY(fpsimd_save_state)
+SYM_FUNC_START(fpsimd_save_state)
fpsimd_save x0, 8
ret
-ENDPROC(fpsimd_save_state)
+SYM_FUNC_END(fpsimd_save_state)
/*
* Load the FP registers.
*
* x0 - pointer to struct fpsimd_state
*/
-ENTRY(fpsimd_load_state)
+SYM_FUNC_START(fpsimd_load_state)
fpsimd_restore x0, 8
ret
-ENDPROC(fpsimd_load_state)
+SYM_FUNC_END(fpsimd_load_state)
#ifdef CONFIG_ARM64_SVE
-ENTRY(sve_save_state)
+
+SYM_FUNC_START(sve_save_state)
sve_save 0, x1, 2
ret
-ENDPROC(sve_save_state)
+SYM_FUNC_END(sve_save_state)
-ENTRY(sve_load_state)
+SYM_FUNC_START(sve_load_state)
sve_load 0, x1, x2, 3, x4
ret
-ENDPROC(sve_load_state)
+SYM_FUNC_END(sve_load_state)
-ENTRY(sve_get_vl)
+SYM_FUNC_START(sve_get_vl)
_sve_rdvl 0, 1
ret
-ENDPROC(sve_get_vl)
+SYM_FUNC_END(sve_get_vl)
+
+/*
+ * Load SVE state from FPSIMD state.
+ *
+ * x0 = pointer to struct fpsimd_state
+ * x1 = VQ - 1
+ *
+ * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD
+ * and the rest zeroed. All the other SVE registers will be zeroed.
+ */
+SYM_FUNC_START(sve_load_from_fpsimd_state)
+ sve_load_vq x1, x2, x3
+ fpsimd_restore x0, 8
+ _for n, 0, 15, _sve_pfalse \n
+ _sve_wrffr 0
+ ret
+SYM_FUNC_END(sve_load_from_fpsimd_state)
+
+/* Zero all SVE registers but the first 128-bits of each vector */
+SYM_FUNC_START(sve_flush_live)
+ sve_flush
+ ret
+SYM_FUNC_END(sve_flush_live)
+
#endif /* CONFIG_ARM64_SVE */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 33d003d..67f68c9 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -7,10 +7,143 @@
*/
#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+/*
+ * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
+ * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
+ * ftrace_make_call() have patched those NOPs to:
+ *
+ * MOV X9, LR
+ * BL <entry>
+ *
+ * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
+ *
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are
+ * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to
+ * clobber.
+ *
+ * We save the callsite's context into a pt_regs before invoking any ftrace
+ * callbacks. So that we can get a sensible backtrace, we create a stack record
+ * for the callsite and the ftrace entry assembly. This is not sufficient for
+ * reliable stacktrace: until we create the callsite stack record, its caller
+ * is missing from the LR and existing chain of frame records.
+ */
+ .macro ftrace_regs_entry, allregs=0
+ /* Make room for pt_regs, plus a callee frame */
+ sub sp, sp, #(S_FRAME_SIZE + 16)
+
+ /* Save function arguments (and x9 for simplicity) */
+ stp x0, x1, [sp, #S_X0]
+ stp x2, x3, [sp, #S_X2]
+ stp x4, x5, [sp, #S_X4]
+ stp x6, x7, [sp, #S_X6]
+ stp x8, x9, [sp, #S_X8]
+
+ /* Optionally save the callee-saved registers, always save the FP */
+ .if \allregs == 1
+ stp x10, x11, [sp, #S_X10]
+ stp x12, x13, [sp, #S_X12]
+ stp x14, x15, [sp, #S_X14]
+ stp x16, x17, [sp, #S_X16]
+ stp x18, x19, [sp, #S_X18]
+ stp x20, x21, [sp, #S_X20]
+ stp x22, x23, [sp, #S_X22]
+ stp x24, x25, [sp, #S_X24]
+ stp x26, x27, [sp, #S_X26]
+ stp x28, x29, [sp, #S_X28]
+ .else
+ str x29, [sp, #S_FP]
+ .endif
+
+ /* Save the callsite's SP and LR */
+ add x10, sp, #(S_FRAME_SIZE + 16)
+ stp x9, x10, [sp, #S_LR]
+
+ /* Save the PC after the ftrace callsite */
+ str x30, [sp, #S_PC]
+
+ /* Create a frame record for the callsite above pt_regs */
+ stp x29, x9, [sp, #S_FRAME_SIZE]
+ add x29, sp, #S_FRAME_SIZE
+
+ /* Create our frame record within pt_regs. */
+ stp x29, x30, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+ .endm
+
+SYM_CODE_START(ftrace_regs_caller)
+#ifdef BTI_C
+ BTI_C
+#endif
+ ftrace_regs_entry 1
+ b ftrace_common
+SYM_CODE_END(ftrace_regs_caller)
+
+SYM_CODE_START(ftrace_caller)
+#ifdef BTI_C
+ BTI_C
+#endif
+ ftrace_regs_entry 0
+ b ftrace_common
+SYM_CODE_END(ftrace_caller)
+
+SYM_CODE_START(ftrace_common)
+ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ mov x1, x9 // parent_ip (callsite's LR)
+ ldr_l x2, function_trace_op // op
+ mov x3, sp // regs
+
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
+ bl ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
+ nop // If enabled, this will be replaced
+ // "b ftrace_graph_caller"
+#endif
+
+/*
+ * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
+ * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
+ * to restore x0-x8, x29, and x30.
+ */
+ftrace_common_return:
+ /* Restore function arguments */
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ ldr x8, [sp, #S_X8]
+
+ /* Restore the callsite's FP, LR, PC */
+ ldr x29, [sp, #S_FP]
+ ldr x30, [sp, #S_LR]
+ ldr x9, [sp, #S_PC]
+
+ /* Restore the callsite's SP */
+ add sp, sp, #S_FRAME_SIZE + 16
+
+ ret x9
+SYM_CODE_END(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_CODE_START(ftrace_graph_caller)
+ ldr x0, [sp, #S_PC]
+ sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ add x1, sp, #S_LR // parent_ip (callsite's LR)
+ ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
+ bl prepare_ftrace_return
+ b ftrace_common_return
+SYM_CODE_END(ftrace_graph_caller)
+#endif
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
/*
* Gcc with -pg will put the following code in the beginning of each function:
* mov x0, x30
@@ -92,7 +225,7 @@
* - tracer function to probe instrumented function's entry,
* - ftrace_graph_caller to set up an exit hook
*/
-ENTRY(_mcount)
+SYM_FUNC_START(_mcount)
mcount_enter
ldr_l x2, ftrace_trace_function
@@ -116,7 +249,7 @@
b.ne ftrace_graph_caller // ftrace_graph_caller();
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
mcount_exit
-ENDPROC(_mcount)
+SYM_FUNC_END(_mcount)
EXPORT_SYMBOL(_mcount)
NOKPROBE(_mcount)
@@ -127,9 +260,9 @@
* and later on, NOP to branch to ftrace_caller() when enabled or branch to
* NOP when disabled per-function base.
*/
-ENTRY(_mcount)
+SYM_FUNC_START(_mcount)
ret
-ENDPROC(_mcount)
+SYM_FUNC_END(_mcount)
EXPORT_SYMBOL(_mcount)
NOKPROBE(_mcount)
@@ -142,30 +275,26 @@
* - tracer function to probe instrumented function's entry,
* - ftrace_graph_caller to set up an exit hook
*/
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
mcount_enter
mcount_get_pc0 x0 // function's pc
mcount_get_lr x1 // function's lr
-GLOBAL(ftrace_call) // tracer(pc, lr);
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) // tracer(pc, lr);
nop // This will be replaced with "bl xxx"
// where xxx can be any kind of tracer.
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
nop // If enabled, this will be replaced
// "b ftrace_graph_caller"
#endif
mcount_exit
-ENDPROC(ftrace_caller)
+SYM_FUNC_END(ftrace_caller)
#endif /* CONFIG_DYNAMIC_FTRACE */
-ENTRY(ftrace_stub)
- ret
-ENDPROC(ftrace_stub)
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* void ftrace_graph_caller(void)
@@ -176,22 +305,29 @@
* the call stack in order to intercept instrumented function's return path
* and run return_to_handler() later on its exit.
*/
-ENTRY(ftrace_graph_caller)
+SYM_FUNC_START(ftrace_graph_caller)
mcount_get_pc x0 // function's pc
mcount_get_lr_addr x1 // pointer to function's saved lr
mcount_get_parent_fp x2 // parent's fp
bl prepare_ftrace_return // prepare_ftrace_return(pc, &lr, fp)
mcount_exit
-ENDPROC(ftrace_graph_caller)
+SYM_FUNC_END(ftrace_graph_caller)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+SYM_FUNC_START(ftrace_stub)
+ ret
+SYM_FUNC_END(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* void return_to_handler(void)
*
* Run ftrace_return_to_handler() before going back to parent.
* @fp is checked against the value passed by ftrace_graph_caller().
*/
-ENTRY(return_to_handler)
+SYM_CODE_START(return_to_handler)
/* save return value regs */
sub sp, sp, #64
stp x0, x1, [sp]
@@ -211,5 +347,5 @@
add sp, sp, #64
ret
-END(return_to_handler)
+SYM_CODE_END(return_to_handler)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index db13774..d5bc1db 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -14,6 +14,8 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
+#include <asm/asm_pointer_auth.h>
+#include <asm/bug.h>
#include <asm/cpufeature.h>
#include <asm/errno.h>
#include <asm/esr.h>
@@ -22,23 +24,24 @@
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
/*
- * Context tracking subsystem. Used to instrument transitions
- * between user and kernel mode.
+ * Context tracking and irqflag tracing need to instrument transitions between
+ * user and kernel mode.
*/
- .macro ct_user_exit_irqoff
-#ifdef CONFIG_CONTEXT_TRACKING
+ .macro user_exit_irqoff
+#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
bl enter_from_user_mode
#endif
.endm
- .macro ct_user_enter
-#ifdef CONFIG_CONTEXT_TRACKING
- bl context_tracking_user_enter
+ .macro user_enter_irqoff
+#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
+ bl exit_to_user_mode
#endif
.endm
@@ -59,24 +62,28 @@
.macro kernel_ventry, el, label, regsize = 64
.align 7
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+.Lventry_start\@:
.if \el == 0
+ /*
+ * This must be the first instruction of the EL0 vector entries. It is
+ * skipped by the trampoline vectors, to trigger the cleanup.
+ */
+ b .Lskip_tramp_vectors_cleanup\@
.if \regsize == 64
mrs x30, tpidrro_el0
msr tpidrro_el0, xzr
.else
mov x30, xzr
.endif
+.Lskip_tramp_vectors_cleanup\@:
.endif
-alternative_else_nop_endif
-#endif
sub sp, sp, #S_FRAME_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
- * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
+ * Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
+ * should always be zero.
*/
add sp, sp, x0 // sp' = sp + x0
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
@@ -116,19 +123,24 @@
mrs x0, tpidrro_el0
#endif
b el\()\el\()_\label
+.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
- .macro tramp_alias, dst, sym
+ .macro tramp_alias, dst, sym, tmp
mov_q \dst, TRAMP_VALIAS
- add \dst, \dst, #(\sym - .entry.tramp.text)
+ adr_l \tmp, \sym
+ add \dst, \dst, \tmp
+ adr_l \tmp, .entry.tramp.text
+ sub \dst, \dst, \tmp
.endm
- // This macro corrupts x0-x3. It is the caller's duty
- // to save/restore them if required.
+ /*
+ * This macro corrupts x0-x3. It is the caller's duty to save/restore
+ * them if required.
+ */
.macro apply_ssbd, state, tmp1, tmp2
-#ifdef CONFIG_ARM64_SSBD
-alternative_cb arm64_enable_wa2_handling
- b .L__asm_ssbd_skip\@
+alternative_cb spectre_v4_patch_fw_mitigation_enable
+ b .L__asm_ssbd_skip\@ // Patched to NOP
alternative_cb_end
ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1
cbz \tmp2, .L__asm_ssbd_skip\@
@@ -136,10 +148,37 @@
tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
mov w1, #\state
-alternative_cb arm64_update_smccc_conduit
+alternative_cb smccc_patch_fw_mitigation_conduit
nop // Patched to SMC/HVC #0
alternative_cb_end
.L__asm_ssbd_skip\@:
+ .endm
+
+ /* Check for MTE asynchronous tag check faults */
+ .macro check_mte_async_tcf, tmp, ti_flags
+#ifdef CONFIG_ARM64_MTE
+ .arch_extension lse
+alternative_if_not ARM64_MTE
+ b 1f
+alternative_else_nop_endif
+ mrs_s \tmp, SYS_TFSRE0_EL1
+ tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f
+ /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */
+ mov \tmp, #_TIF_MTE_ASYNC_FAULT
+ add \ti_flags, tsk, #TSK_TI_FLAGS
+ stset \tmp, [\ti_flags]
+ msr_s SYS_TFSRE0_EL1, xzr
+1:
+#endif
+ .endm
+
+ /* Clear the MTE asynchronous tag check faults */
+ .macro clear_mte_async_tcf
+#ifdef CONFIG_ARM64_MTE
+alternative_if ARM64_MTE
+ dsb ish
+ msr_s SYS_TFSRE0_EL1, xzr
+alternative_else_nop_endif
#endif
.endm
@@ -166,12 +205,23 @@
.if \el == 0
clear_gp_regs
mrs x21, sp_el0
- ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
- ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
- disable_step_tsk x19, x20 // exceptions when scheduling.
+ ldr_this_cpu tsk, __entry_task, x20
+ msr sp_el0, tsk
+ /*
+ * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions
+ * when scheduling.
+ */
+ ldr x19, [tsk, #TSK_TI_FLAGS]
+ disable_step_tsk x19, x20
+
+ /* Check for asynchronous tag check faults in user space */
+ check_mte_async_tcf x22, x23
apply_ssbd 1, x22, x23
+ ptrauth_keys_install_kernel tsk, x20, x22, x23
+
+ scs_load tsk, x20
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -199,28 +249,9 @@
add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- /*
- * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
- * EL0, there is no need to check the state of TTBR0_EL1 since
- * accesses are always enabled.
- * Note that the meaning of this bit differs from the ARMv8.1 PAN
- * feature as all TTBR0_EL1 accesses are disabled, not just those to
- * user mappings.
- */
-alternative_if ARM64_HAS_PAN
- b 1f // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+ bl __swpan_entry_el\el
alternative_else_nop_endif
-
- .if \el != 0
- mrs x21, ttbr0_el1
- tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
- orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
- b.eq 1f // TTBR0 access already disabled
- and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
- .endif
-
- __uaccess_ttbr0_disable x21
-1:
#endif
stp x22, x23, [sp, #S_PC]
@@ -231,19 +262,21 @@
str w21, [sp, #S_SYSCALLNO]
.endif
- /*
- * Set sp_el0 to current thread_info.
- */
- .if \el == 0
- msr sp_el0, tsk
- .endif
-
/* Save pmr */
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mrs_s x20, SYS_ICC_PMR_EL1
str x20, [sp, #S_PMR_SAVE]
+ mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
+ msr_s SYS_ICC_PMR_EL1, x20
alternative_else_nop_endif
+ /* Re-enable tag checking (TCO set on exception entry) */
+#ifdef CONFIG_ARM64_MTE
+alternative_if ARM64_MTE
+ SET_PSTATE_TCO(0)
+alternative_else_nop_endif
+#endif
+
/*
* Registers that may be useful after this macro is invoked:
*
@@ -269,44 +302,18 @@
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
ldr x20, [sp, #S_PMR_SAVE]
msr_s SYS_ICC_PMR_EL1, x20
- /* Ensure priority change is seen by redistributor */
- dsb sy
+ mrs_s x21, SYS_ICC_CTLR_EL1
+ tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE
+ dsb sy // Ensure priority change is seen by redistributor
+.L__skip_pmr_sync\@:
alternative_else_nop_endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
- .if \el == 0
- ct_user_enter
- .endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- /*
- * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
- * PAN bit checking.
- */
-alternative_if ARM64_HAS_PAN
- b 2f // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+ bl __swpan_exit_el\el
alternative_else_nop_endif
-
- .if \el != 0
- tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
- .endif
-
- __uaccess_ttbr0_enable x0, x1
-
- .if \el == 0
- /*
- * Enable errata workarounds only if returning to user. The only
- * workaround currently required for TTBR0_EL1 changes are for the
- * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
- * corruption).
- */
- bl post_ttbr_update_workaround
- .endif
-1:
- .if \el != 0
- and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
- .endif
-2:
#endif
.if \el == 0
@@ -326,21 +333,11 @@
alternative_else_nop_endif
#endif
3:
-#ifdef CONFIG_ARM64_ERRATUM_1418040
-alternative_if_not ARM64_WORKAROUND_1418040
- b 4f
-alternative_else_nop_endif
- /*
- * if (x22.mode32 == cntkctl_el1.el0vcten)
- * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten
- */
- mrs x1, cntkctl_el1
- eon x0, x1, x22, lsr #3
- tbz x0, #1, 4f
- eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN
- msr cntkctl_el1, x1
-4:
-#endif
+ scs_save tsk, x0
+
+ /* No kernel C function calls after this as user keys are set. */
+ ptrauth_keys_install_user tsk, x0, x1, x2
+
apply_ssbd 0, x0, x1
.endif
@@ -361,28 +358,82 @@
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
- ldr lr, [sp, #S_LR]
- add sp, sp, #S_FRAME_SIZE // restore sp
.if \el == 0
-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
+ ldr lr, [sp, #S_LR]
+ add sp, sp, #S_FRAME_SIZE // restore sp
+ eret
+alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- bne 5f
- msr far_el1, x30
- tramp_alias x30, tramp_exit_native
+ bne 4f
+ msr far_el1, x29
+ tramp_alias x30, tramp_exit_native, x29
br x30
-5:
- tramp_alias x30, tramp_exit_compat
+4:
+ tramp_alias x30, tramp_exit_compat, x29
br x30
#endif
.else
+ ldr lr, [sp, #S_LR]
+ add sp, sp, #S_FRAME_SIZE // restore sp
+
+ /* Ensure any device/NC reads complete */
+ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
+
eret
.endif
sb
.endm
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
+ */
+SYM_CODE_START_LOCAL(__swpan_entry_el1)
+ mrs x21, ttbr0_el1
+ tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+SYM_INNER_LABEL(__swpan_entry_el0, SYM_L_LOCAL)
+ __uaccess_ttbr0_disable x21
+1: ret
+SYM_CODE_END(__swpan_entry_el1)
+
+ /*
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
+ */
+SYM_CODE_START_LOCAL(__swpan_exit_el1)
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ __uaccess_ttbr0_enable x0, x1
+1: and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ ret
+SYM_CODE_END(__swpan_exit_el1)
+
+SYM_CODE_START_LOCAL(__swpan_exit_el0)
+ __uaccess_ttbr0_enable x0, x1
+ /*
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
+ */
+ b post_ttbr_update_workaround
+SYM_CODE_END(__swpan_exit_el0)
+#endif
+
.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, scs_sp // preserve the original shadow stack
+#endif
/*
* Compare sp with the base of the task stack.
@@ -400,15 +451,25 @@
/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ adr_this_cpu scs_sp, irq_shadow_call_stack, x26
+#endif
+
9998:
.endm
/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov scs_sp, x24
+#endif
.endm
/* GPRs used by entry code */
@@ -417,8 +478,8 @@
/*
* Interrupt handling.
*/
- .macro irq_handler
- ldr_l x1, handle_arch_irq
+ .macro irq_handler, handler:req
+ ldr_l x1, \handler
mov x0, sp
irq_stack_entry
blr x1
@@ -448,13 +509,41 @@
#endif
.endm
- .macro gic_prio_irq_setup, pmr:req, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
- msr_s SYS_ICC_PMR_EL1, \tmp
- alternative_else_nop_endif
+ .macro el1_interrupt_handler, handler:req
+ enable_da_f
+
+ mov x0, sp
+ bl enter_el1_irq_or_nmi
+
+ irq_handler \handler
+
+#ifdef CONFIG_PREEMPTION
+ ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
+alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ /*
+ * DA_F were cleared at start of handling. If anything is set in DAIF,
+ * we come back from an NMI, so skip preemption
+ */
+ mrs x0, daif
+ orr x24, x24, x0
+alternative_else_nop_endif
+ cbnz x24, 1f // preempt count != 0 || NMI return path
+ bl arm64_preempt_schedule_irq // irq en/disable is done inside
+1:
#endif
+
+ mov x0, sp
+ bl exit_el1_irq_or_nmi
+ .endm
+
+ .macro el0_interrupt_handler, handler:req
+ user_exit_irqoff
+ enable_da_f
+
+ tbz x22, #55, 1f
+ bl do_el0_irq_bp_hardening
+1:
+ irq_handler \handler
.endm
.text
@@ -465,7 +554,7 @@
.pushsection ".entry.text", "ax"
.align 11
-ENTRY(vectors)
+SYM_CODE_START(vectors)
kernel_ventry 1, sync_invalid // Synchronous EL1t
kernel_ventry 1, irq_invalid // IRQ EL1t
kernel_ventry 1, fiq_invalid // FIQ EL1t
@@ -492,7 +581,7 @@
kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
-END(vectors)
+SYM_CODE_END(vectors)
#ifdef CONFIG_VMAP_STACK
/*
@@ -534,490 +623,159 @@
ASM_BUG()
.endm
-el0_sync_invalid:
+SYM_CODE_START_LOCAL(el0_sync_invalid)
inv_entry 0, BAD_SYNC
-ENDPROC(el0_sync_invalid)
+SYM_CODE_END(el0_sync_invalid)
-el0_irq_invalid:
+SYM_CODE_START_LOCAL(el0_irq_invalid)
inv_entry 0, BAD_IRQ
-ENDPROC(el0_irq_invalid)
+SYM_CODE_END(el0_irq_invalid)
-el0_fiq_invalid:
+SYM_CODE_START_LOCAL(el0_fiq_invalid)
inv_entry 0, BAD_FIQ
-ENDPROC(el0_fiq_invalid)
+SYM_CODE_END(el0_fiq_invalid)
-el0_error_invalid:
+SYM_CODE_START_LOCAL(el0_error_invalid)
inv_entry 0, BAD_ERROR
-ENDPROC(el0_error_invalid)
+SYM_CODE_END(el0_error_invalid)
#ifdef CONFIG_COMPAT
-el0_fiq_invalid_compat:
+SYM_CODE_START_LOCAL(el0_fiq_invalid_compat)
inv_entry 0, BAD_FIQ, 32
-ENDPROC(el0_fiq_invalid_compat)
+SYM_CODE_END(el0_fiq_invalid_compat)
#endif
-el1_sync_invalid:
+SYM_CODE_START_LOCAL(el1_sync_invalid)
inv_entry 1, BAD_SYNC
-ENDPROC(el1_sync_invalid)
+SYM_CODE_END(el1_sync_invalid)
-el1_irq_invalid:
+SYM_CODE_START_LOCAL(el1_irq_invalid)
inv_entry 1, BAD_IRQ
-ENDPROC(el1_irq_invalid)
+SYM_CODE_END(el1_irq_invalid)
-el1_fiq_invalid:
+SYM_CODE_START_LOCAL(el1_fiq_invalid)
inv_entry 1, BAD_FIQ
-ENDPROC(el1_fiq_invalid)
+SYM_CODE_END(el1_fiq_invalid)
-el1_error_invalid:
+SYM_CODE_START_LOCAL(el1_error_invalid)
inv_entry 1, BAD_ERROR
-ENDPROC(el1_error_invalid)
+SYM_CODE_END(el1_error_invalid)
/*
* EL1 mode handlers.
*/
.align 6
-el1_sync:
+SYM_CODE_START_LOCAL_NOALIGN(el1_sync)
kernel_entry 1
- mrs x1, esr_el1 // read the syndrome register
- lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
- b.eq el1_da
- cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
- b.eq el1_ia
- cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- b.eq el1_undef
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el1_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
- b.eq el1_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
- b.ge el1_dbg
- b el1_inv
-
-el1_ia:
- /*
- * Fall through to the Data abort case
- */
-el1_da:
- /*
- * Data abort handling
- */
- mrs x3, far_el1
- inherit_daif pstate=x23, tmp=x2
- untagged_addr x0, x3
- mov x2, sp // struct pt_regs
- bl do_mem_abort
-
- kernel_exit 1
-el1_pc:
- /*
- * PC alignment exception handling. We don't handle SP alignment faults,
- * since we will have hit a recursive exception when trying to push the
- * initial pt_regs.
- */
- mrs x0, far_el1
- inherit_daif pstate=x23, tmp=x2
- mov x2, sp
- bl do_sp_pc_abort
- ASM_BUG()
-el1_undef:
- /*
- * Undefined instruction
- */
- inherit_daif pstate=x23, tmp=x2
mov x0, sp
- bl do_undefinstr
+ bl el1_sync_handler
kernel_exit 1
-el1_dbg:
- /*
- * Debug exception handling
- */
- cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
- cinc x24, x24, eq // set bit '0'
- tbz x24, #0, el1_inv // EL1 only
- gic_prio_kentry_setup tmp=x3
- mrs x0, far_el1
- mov x2, sp // struct pt_regs
- bl do_debug_exception
- kernel_exit 1
-el1_inv:
- // TODO: add support for undefined instructions in kernel mode
- inherit_daif pstate=x23, tmp=x2
- mov x0, sp
- mov x2, x1
- mov x1, #BAD_SYNC
- bl bad_mode
- ASM_BUG()
-ENDPROC(el1_sync)
+SYM_CODE_END(el1_sync)
.align 6
-el1_irq:
+SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
kernel_entry 1
- gic_prio_irq_setup pmr=x20, tmp=x1
- enable_da_f
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- test_irqs_unmasked res=x0, pmr=x20
- cbz x0, 1f
- bl asm_nmi_enter
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
-
- irq_handler
-
-#ifdef CONFIG_PREEMPT
- ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- /*
- * DA_F were cleared at start of handling. If anything is set in DAIF,
- * we come back from an NMI, so skip preemption
- */
- mrs x0, daif
- orr x24, x24, x0
-alternative_else_nop_endif
- cbnz x24, 1f // preempt count != 0 || NMI return path
- bl arm64_preempt_schedule_irq // irq en/disable is done inside
-1:
-#endif
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- /*
- * When using IRQ priority masking, we can get spurious interrupts while
- * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
- * section with interrupts disabled. Skip tracing in those cases.
- */
- test_irqs_unmasked res=x0, pmr=x20
- cbz x0, 1f
- bl asm_nmi_exit
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- test_irqs_unmasked res=x0, pmr=x20
- cbnz x0, 1f
-#endif
- bl trace_hardirqs_on
-1:
-#endif
-
+ el1_interrupt_handler handle_arch_irq
kernel_exit 1
-ENDPROC(el1_irq)
+SYM_CODE_END(el1_irq)
/*
* EL0 mode handlers.
*/
.align 6
-el0_sync:
+SYM_CODE_START_LOCAL_NOALIGN(el0_sync)
kernel_entry 0
- mrs x25, esr_el1 // read the syndrome register
- lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state
- b.eq el0_svc
- cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
- b.eq el0_da
- cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
- b.eq el0_ia
- cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
- cmp x24, #ESR_ELx_EC_SVE // SVE access
- b.eq el0_sve_acc
- cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
- b.eq el0_fpsimd_exc
- cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- ccmp x24, #ESR_ELx_EC_WFx, #4, ne
- b.eq el0_sys
- cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el0_sp
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
- b.ge el0_dbg
- b el0_inv
+ mov x0, sp
+ bl el0_sync_handler
+ b ret_to_user
+SYM_CODE_END(el0_sync)
#ifdef CONFIG_COMPAT
.align 6
-el0_sync_compat:
+SYM_CODE_START_LOCAL_NOALIGN(el0_sync_compat)
kernel_entry 0, 32
- mrs x25, esr_el1 // read the syndrome register
- lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state
- b.eq el0_svc_compat
- cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
- b.eq el0_da
- cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
- b.eq el0_ia
- cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
- cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
- b.eq el0_fpsimd_exc
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
- b.eq el0_cp15
- cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap
- b.eq el0_cp15
- cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
- b.ge el0_dbg
- b el0_inv
-el0_svc_compat:
- gic_prio_kentry_setup tmp=x1
mov x0, sp
- bl el0_svc_compat_handler
+ bl el0_sync_compat_handler
b ret_to_user
+SYM_CODE_END(el0_sync_compat)
.align 6
-el0_irq_compat:
+SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
kernel_entry 0, 32
b el0_irq_naked
+SYM_CODE_END(el0_irq_compat)
-el0_error_compat:
+SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
kernel_entry 0, 32
b el0_error_naked
-
-el0_cp15:
- /*
- * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_cp15instr
- b ret_to_user
+SYM_CODE_END(el0_error_compat)
#endif
-el0_da:
- /*
- * Data abort handling
- */
- mrs x26, far_el1
- ct_user_exit_irqoff
- enable_daif
- untagged_addr x0, x26
- mov x1, x25
- mov x2, sp
- bl do_mem_abort
- b ret_to_user
-el0_ia:
- /*
- * Instruction abort handling
- */
- mrs x26, far_el1
- gic_prio_kentry_setup tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
- mov x0, x26
- mov x1, x25
- mov x2, sp
- bl do_el0_ia_bp_hardening
- b ret_to_user
-el0_fpsimd_acc:
- /*
- * Floating Point or Advanced SIMD access
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_fpsimd_acc
- b ret_to_user
-el0_sve_acc:
- /*
- * Scalable Vector Extension access
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_sve_acc
- b ret_to_user
-el0_fpsimd_exc:
- /*
- * Floating Point, Advanced SIMD or SVE exception
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_fpsimd_exc
- b ret_to_user
-el0_sp:
- ldr x26, [sp, #S_SP]
- b el0_sp_pc
-el0_pc:
- mrs x26, far_el1
-el0_sp_pc:
- /*
- * Stack or PC alignment exception handling
- */
- gic_prio_kentry_setup tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
- mov x0, x26
- mov x1, x25
- mov x2, sp
- bl do_sp_pc_abort
- b ret_to_user
-el0_undef:
- /*
- * Undefined instruction
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, sp
- bl do_undefinstr
- b ret_to_user
-el0_sys:
- /*
- * System instructions, for trapped cache maintenance instructions
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_sysinstr
- b ret_to_user
-el0_dbg:
- /*
- * Debug exception handling
- */
- tbnz x24, #0, el0_inv // EL0 only
- mrs x24, far_el1
- gic_prio_kentry_setup tmp=x3
- ct_user_exit_irqoff
- mov x0, x24
- mov x1, x25
- mov x2, sp
- bl do_debug_exception
- enable_da_f
- b ret_to_user
-el0_inv:
- ct_user_exit_irqoff
- enable_daif
- mov x0, sp
- mov x1, #BAD_SYNC
- mov x2, x25
- bl bad_el0_sync
- b ret_to_user
-ENDPROC(el0_sync)
-
.align 6
-el0_irq:
+SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
kernel_entry 0
el0_irq_naked:
- gic_prio_irq_setup pmr=x20, tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
- tbz x22, #55, 1f
- bl do_el0_irq_bp_hardening
-1:
-#endif
- irq_handler
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_on
-#endif
+ el0_interrupt_handler handle_arch_irq
b ret_to_user
-ENDPROC(el0_irq)
+SYM_CODE_END(el0_irq)
-el1_error:
+SYM_CODE_START_LOCAL(el1_error)
kernel_entry 1
mrs x1, esr_el1
- gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
kernel_exit 1
-ENDPROC(el1_error)
+SYM_CODE_END(el1_error)
-el0_error:
+SYM_CODE_START_LOCAL(el0_error)
kernel_entry 0
el0_error_naked:
mrs x25, esr_el1
- gic_prio_kentry_setup tmp=x2
- ct_user_exit_irqoff
+ user_exit_irqoff
enable_dbg
mov x0, sp
mov x1, x25
bl do_serror
enable_da_f
b ret_to_user
-ENDPROC(el0_error)
+SYM_CODE_END(el0_error)
+
+/*
+ * "slow" syscall return path.
+ */
+SYM_CODE_START_LOCAL(ret_to_user)
+ disable_daif
+ gic_prio_kentry_setup tmp=x3
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_off
+#endif
+ ldr x19, [tsk, #TSK_TI_FLAGS]
+ and x2, x19, #_TIF_WORK_MASK
+ cbnz x2, work_pending
+finish_ret_to_user:
+ user_enter_irqoff
+ /* Ignore asynchronous tag check faults in the uaccess routines */
+ clear_mte_async_tcf
+ enable_step_tsk x19, x2
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ bl stackleak_erase
+#endif
+ kernel_exit 0
/*
* Ok, we need to do extra processing, enter the slow path.
*/
work_pending:
mov x0, sp // 'regs'
+ mov x1, x19
bl do_notify_resume
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_on // enabled while in userspace
-#endif
- ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
+ ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
b finish_ret_to_user
-/*
- * "slow" syscall return path.
- */
-ret_to_user:
- disable_daif
- gic_prio_kentry_setup tmp=x3
- ldr x1, [tsk, #TSK_TI_FLAGS]
- and x2, x1, #_TIF_WORK_MASK
- cbnz x2, work_pending
-finish_ret_to_user:
- enable_step_tsk x1, x2
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
- bl stackleak_erase
-#endif
- kernel_exit 0
-ENDPROC(ret_to_user)
-
-/*
- * SVC handler.
- */
- .align 6
-el0_svc:
- gic_prio_kentry_setup tmp=x1
- mov x0, sp
- bl el0_svc_handler
- b ret_to_user
-ENDPROC(el0_svc)
+SYM_CODE_END(ret_to_user)
.popsection // .entry.text
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * Exception vectors trampoline.
- */
- .pushsection ".entry.tramp.text", "ax"
-
// Move from tramp_pg_dir to swapper_pg_dir
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
@@ -1051,12 +809,47 @@
*/
.endm
- .macro tramp_ventry, regsize = 64
+ .macro tramp_data_page dst
+ adr_l \dst, .entry.tramp.text
+ sub \dst, \dst, PAGE_SIZE
+ .endm
+
+ .macro tramp_data_read_var dst, var
+#ifdef CONFIG_RANDOMIZE_BASE
+ tramp_data_page \dst
+ add \dst, \dst, #:lo12:__entry_tramp_data_\var
+ ldr \dst, [\dst]
+#else
+ ldr \dst, =\var
+#endif
+ .endm
+
+#define BHB_MITIGATION_NONE 0
+#define BHB_MITIGATION_LOOP 1
+#define BHB_MITIGATION_FW 2
+#define BHB_MITIGATION_INSN 3
+
+ .macro tramp_ventry, vector_start, regsize, kpti, bhb
.align 7
1:
.if \regsize == 64
msr tpidrro_el0, x30 // Restored in kernel_ventry
.endif
+
+ .if \bhb == BHB_MITIGATION_LOOP
+ /*
+ * This sequence must appear before the first indirect branch. i.e. the
+ * ret out of tramp_ventry. It appears here because x30 is free.
+ */
+ __mitigate_spectre_bhb_loop x30
+ .endif // \bhb == BHB_MITIGATION_LOOP
+
+ .if \bhb == BHB_MITIGATION_INSN
+ clearbhb
+ isb
+ .endif // \bhb == BHB_MITIGATION_INSN
+
+ .if \kpti == 1
/*
* Defend against branch aliasing attacks by pushing a dummy
* entry onto the return stack and using a RET instruction to
@@ -1066,69 +859,142 @@
b .
2:
tramp_map_kernel x30
-#ifdef CONFIG_RANDOMIZE_BASE
- adr x30, tramp_vectors + PAGE_SIZE
alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
- ldr x30, [x30]
-#else
- ldr x30, =vectors
-#endif
+ tramp_data_read_var x30, vectors
alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
- prfm plil1strm, [x30, #(1b - tramp_vectors)]
+ prfm plil1strm, [x30, #(1b - \vector_start)]
alternative_else_nop_endif
+
msr vbar_el1, x30
- add x30, x30, #(1b - tramp_vectors)
isb
+ .else
+ ldr x30, =vectors
+ .endif // \kpti == 1
+
+ .if \bhb == BHB_MITIGATION_FW
+ /*
+ * The firmware sequence must appear before the first indirect branch.
+ * i.e. the ret out of tramp_ventry. But it also needs the stack to be
+ * mapped to save/restore the registers the SMC clobbers.
+ */
+ __mitigate_spectre_bhb_fw
+ .endif // \bhb == BHB_MITIGATION_FW
+
+ add x30, x30, #(1b - \vector_start + 4)
ret
+.org 1b + 128 // Did we overflow the ventry slot?
.endm
.macro tramp_exit, regsize = 64
- adr x30, tramp_vectors
+ tramp_data_read_var x30, this_cpu_vector
+ this_cpu_offset x29
+ ldr x30, [x30, x29]
+
msr vbar_el1, x30
- tramp_unmap_kernel x30
+ ldr lr, [sp, #S_LR]
+ tramp_unmap_kernel x29
.if \regsize == 64
- mrs x30, far_el1
+ mrs x29, far_el1
.endif
+ add sp, sp, #S_FRAME_SIZE // restore sp
eret
sb
.endm
- .align 11
-ENTRY(tramp_vectors)
+ .macro generate_tramp_vector, kpti, bhb
+.Lvector_start\@:
.space 0x400
- tramp_ventry
- tramp_ventry
- tramp_ventry
- tramp_ventry
+ .rept 4
+ tramp_ventry .Lvector_start\@, 64, \kpti, \bhb
+ .endr
+ .rept 4
+ tramp_ventry .Lvector_start\@, 32, \kpti, \bhb
+ .endr
+ .endm
- tramp_ventry 32
- tramp_ventry 32
- tramp_ventry 32
- tramp_ventry 32
-END(tramp_vectors)
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ * The order must match __bp_harden_el1_vectors and the
+ * arm64_bp_harden_el1_vectors enum.
+ */
+ .pushsection ".entry.tramp.text", "ax"
+ .align 11
+SYM_CODE_START_NOALIGN(tramp_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE
+SYM_CODE_END(tramp_vectors)
-ENTRY(tramp_exit_native)
+SYM_CODE_START(tramp_exit_native)
tramp_exit
-END(tramp_exit_native)
+SYM_CODE_END(tramp_exit_native)
-ENTRY(tramp_exit_compat)
+SYM_CODE_START(tramp_exit_compat)
tramp_exit 32
-END(tramp_exit_compat)
+SYM_CODE_END(tramp_exit_compat)
.ltorg
.popsection // .entry.tramp.text
#ifdef CONFIG_RANDOMIZE_BASE
.pushsection ".rodata", "a"
.align PAGE_SHIFT
- .globl __entry_tramp_data_start
-__entry_tramp_data_start:
+SYM_DATA_START(__entry_tramp_data_start)
+__entry_tramp_data_vectors:
.quad vectors
+#ifdef CONFIG_ARM_SDE_INTERFACE
+__entry_tramp_data___sdei_asm_handler:
+ .quad __sdei_asm_handler
+#endif /* CONFIG_ARM_SDE_INTERFACE */
+__entry_tramp_data_this_cpu_vector:
+ .quad this_cpu_vector
+SYM_DATA_END(__entry_tramp_data_start)
.popsection // .rodata
#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
+ * Exception vectors for spectre mitigations on entry from EL1 when
+ * kpti is not in use.
+ */
+ .macro generate_el1_vector, bhb
+.Lvector_start\@:
+ kernel_ventry 1, sync_invalid // Synchronous EL1t
+ kernel_ventry 1, irq_invalid // IRQ EL1t
+ kernel_ventry 1, fiq_invalid // FIQ EL1t
+ kernel_ventry 1, error_invalid // Error EL1t
+
+ kernel_ventry 1, sync // Synchronous EL1h
+ kernel_ventry 1, irq // IRQ EL1h
+ kernel_ventry 1, fiq_invalid // FIQ EL1h
+ kernel_ventry 1, error // Error EL1h
+
+ .rept 4
+ tramp_ventry .Lvector_start\@, 64, 0, \bhb
+ .endr
+ .rept 4
+ tramp_ventry .Lvector_start\@, 32, 0, \bhb
+ .endr
+ .endm
+
+/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */
+ .pushsection ".entry.text", "ax"
+ .align 11
+SYM_CODE_START(__bp_harden_el1_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ generate_el1_vector bhb=BHB_MITIGATION_LOOP
+ generate_el1_vector bhb=BHB_MITIGATION_FW
+ generate_el1_vector bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+SYM_CODE_END(__bp_harden_el1_vectors)
+ .popsection
+
+
+/*
* Register switch for AArch64. The callee-saved registers need to be saved
* and restored. On entry:
* x0 = previous task_struct (must be preserved across the switch)
@@ -1136,7 +1002,7 @@
* Previous and next are guaranteed not to be the same.
*
*/
-ENTRY(cpu_switch_to)
+SYM_FUNC_START(cpu_switch_to)
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -1157,21 +1023,24 @@
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+ ptrauth_keys_install_kernel x1, x8, x9, x10
+ scs_save x0, x8
+ scs_load x1, x8
ret
-ENDPROC(cpu_switch_to)
+SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
/*
* This is how we return from a fork.
*/
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
bl schedule_tail
cbz x19, 1f // not a kernel thread
mov x0, x20
blr x19
1: get_current_task tsk
b ret_to_user
-ENDPROC(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
NOKPROBE(ret_from_fork)
#ifdef CONFIG_ARM_SDE_INTERFACE
@@ -1200,7 +1069,7 @@
*/
.ltorg
.pushsection ".entry.tramp.text", "ax"
-ENTRY(__sdei_asm_entry_trampoline)
+SYM_CODE_START(__sdei_asm_entry_trampoline)
mrs x4, ttbr1_el1
tbz x4, #USER_ASID_BIT, 1f
@@ -1214,15 +1083,9 @@
*/
1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
-#ifdef CONFIG_RANDOMIZE_BASE
- adr x4, tramp_vectors + PAGE_SIZE
- add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
- ldr x4, [x4]
-#else
- ldr x4, =__sdei_asm_handler
-#endif
+ tramp_data_read_var x4, __sdei_asm_handler
br x4
-ENDPROC(__sdei_asm_entry_trampoline)
+SYM_CODE_END(__sdei_asm_entry_trampoline)
NOKPROBE(__sdei_asm_entry_trampoline)
/*
@@ -1232,23 +1095,17 @@
* x2: exit_mode
* x4: struct sdei_registered_event argument from registration time.
*/
-ENTRY(__sdei_asm_exit_trampoline)
+SYM_CODE_START(__sdei_asm_exit_trampoline)
ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
cbnz x4, 1f
tramp_unmap_kernel tmp=x4
1: sdei_handler_exit exit_mode=x2
-ENDPROC(__sdei_asm_exit_trampoline)
+SYM_CODE_END(__sdei_asm_exit_trampoline)
NOKPROBE(__sdei_asm_exit_trampoline)
.ltorg
.popsection // .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-.pushsection ".rodata", "a"
-__sdei_asm_trampoline_next_handler:
- .quad __sdei_asm_handler
-.popsection // .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
@@ -1264,7 +1121,7 @@
* follow SMC-CC. We save (or retrieve) all the registers as the handler may
* want them.
*/
-ENTRY(__sdei_asm_handler)
+SYM_CODE_START(__sdei_asm_handler)
stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
@@ -1284,13 +1141,16 @@
mov x19, x1
+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1300,6 +1160,15 @@
mov sp, x5
#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6
+ b 4f
+3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
@@ -1344,9 +1213,9 @@
alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
+ tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
br x5
#endif
-ENDPROC(__sdei_asm_handler)
+SYM_CODE_END(__sdei_asm_handler)
NOKPROBE(__sdei_asm_handler)
#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e62c9cb..a9bbfb8 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -12,6 +12,7 @@
#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/compat.h>
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
@@ -31,9 +32,11 @@
#include <linux/swab.h>
#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
+#include <asm/neon.h>
#include <asm/processor.h>
#include <asm/simd.h>
#include <asm/sigcontext.h>
@@ -119,10 +122,20 @@
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
/* Default VL for tasks that don't set it explicitly: */
-static int sve_default_vl = -1;
+static int __sve_default_vl = -1;
+
+static int get_sve_default_vl(void)
+{
+ return READ_ONCE(__sve_default_vl);
+}
#ifdef CONFIG_ARM64_SVE
+static void set_sve_default_vl(int val)
+{
+ WRITE_ONCE(__sve_default_vl, val);
+}
+
/* Maximum supported vector length across all CPUs (initially poisoned) */
int __ro_after_init sve_max_vl = SVE_VL_MIN;
int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
@@ -301,7 +314,7 @@
* re-enter user with corrupt state.
* There's no way to recover, so kill it:
*/
- force_signal_inject(SIGKILL, SI_KERNEL, 0);
+ force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
return;
}
@@ -341,11 +354,10 @@
#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
static int sve_proc_do_default_vl(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- int vl = sve_default_vl;
+ int vl = get_sve_default_vl();
struct ctl_table tmp_table = {
.data = &vl,
.maxlen = sizeof(vl),
@@ -362,7 +374,7 @@
if (!sve_vl_valid(vl))
return -EINVAL;
- sve_default_vl = find_supported_vector_length(vl);
+ set_sve_default_vl(find_supported_vector_length(vl));
return 0;
}
@@ -666,7 +678,7 @@
vl = arg & PR_SVE_VL_LEN_MASK;
flags = arg & ~vl;
- if (!system_supports_sve())
+ if (!system_supports_sve() || is_compat_task())
return -EINVAL;
ret = sve_set_vector_length(current, vl, flags);
@@ -679,7 +691,7 @@
/* PR_SVE_GET_VL */
int sve_get_current_vl(void)
{
- if (!system_supports_sve())
+ if (!system_supports_sve() || is_compat_task())
return -EINVAL;
return sve_prctl_status(0);
@@ -869,7 +881,7 @@
* For the default VL, pick the maximum supported value <= 64.
* VL == 64 is guaranteed not to grow the signal frame.
*/
- sve_default_vl = find_supported_vector_length(64);
+ set_sve_default_vl(find_supported_vector_length(64));
bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
SVE_VQ_MAX);
@@ -890,7 +902,7 @@
pr_info("SVE: maximum available vector length %u bytes per vector\n",
sve_max_vl);
pr_info("SVE: default vector length %u bytes per vector\n",
- sve_default_vl);
+ get_sve_default_vl());
/* KVM decides whether to support mismatched systems. Just warn here: */
if (sve_max_virtualisable_vl < sve_max_vl)
@@ -918,15 +930,15 @@
* the SVE access trap will be disabled the next time this task
* reaches ret_to_user.
*
- * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load()
+ * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
* would have disabled the SVE access trap for userspace during
* ret_to_user, making an SVE access trap impossible in that case.
*/
-asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
/* Even if we chose not to use SVE, the hardware could still trap: */
if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
@@ -949,7 +961,7 @@
/*
* Trapped FP/ASIMD access.
*/
-asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
/* TODO: implement lazy context saving/restoring */
WARN_ON(1);
@@ -958,7 +970,7 @@
/*
* Raise a SIGFPE for the current process.
*/
-asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
{
unsigned int si_code = FPE_FLTUNK;
@@ -1030,13 +1042,13 @@
* vector length configured: no kernel task can become a user
* task without an exec and hence a call to this function.
* By the time the first call to this function is made, all
- * early hardware probing is complete, so sve_default_vl
+ * early hardware probing is complete, so __sve_default_vl
* should be valid.
* If a bug causes this to go wrong, we make some noise and
* try to fudge thread.sve_vl to a safe value here.
*/
vl = current->thread.sve_vl_onexec ?
- current->thread.sve_vl_onexec : sve_default_vl;
+ current->thread.sve_vl_onexec : get_sve_default_vl();
if (WARN_ON(!sve_vl_valid(vl)))
vl = SVE_VL_MIN;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 06e56b4..86a5cf9 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -62,6 +62,20 @@
return ftrace_modify_code(pc, 0, new, false);
}
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+{
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ struct plt_entry *plt = mod->arch.ftrace_trampolines;
+
+ if (addr == FTRACE_ADDR)
+ return &plt[FTRACE_PLT_IDX];
+ if (addr == FTRACE_REGS_ADDR &&
+ IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ return &plt[FTRACE_REGS_PLT_IDX];
+#endif
+ return NULL;
+}
+
/*
* Turn on the call to ftrace_caller() in instrumented function
*/
@@ -72,9 +86,11 @@
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
- struct plt_entry trampoline, *dst;
struct module *mod;
+ struct plt_entry *plt;
+
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return -EINVAL;
/*
* On kernels that support module PLTs, the offset between the
@@ -93,49 +109,13 @@
if (WARN_ON(!mod))
return -EINVAL;
- /*
- * There is only one ftrace trampoline per module. For now,
- * this is not a problem since on arm64, all dynamic ftrace
- * invocations are routed via ftrace_caller(). This will need
- * to be revisited if support for multiple ftrace entry points
- * is added in the future, but for now, the pr_err() below
- * deals with a theoretical issue only.
- *
- * Note that PLTs are place relative, and plt_entries_equal()
- * checks whether they point to the same target. Here, we need
- * to check if the actual opcodes are in fact identical,
- * regardless of the offset in memory so use memcmp() instead.
- */
- dst = mod->arch.ftrace_trampoline;
- trampoline = get_plt_entry(addr, dst);
- if (memcmp(dst, &trampoline, sizeof(trampoline))) {
- if (plt_entry_is_initialized(dst)) {
- pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
- return -EINVAL;
- }
-
- /* point the trampoline to our ftrace entry point */
- module_disable_ro(mod);
- *dst = trampoline;
- module_enable_ro(mod, true);
-
- /*
- * Ensure updated trampoline is visible to instruction
- * fetch before we patch in the branch. Although the
- * architecture doesn't require an IPI in this case,
- * Neoverse-N1 erratum #1542419 does require one
- * if the TLB maintenance in module_enable_ro() is
- * skipped due to rodata_enabled. It doesn't seem worth
- * it to make it conditional given that this is
- * certainly not a fast-path.
- */
- flush_icache_range((unsigned long)&dst[0],
- (unsigned long)&dst[1]);
+ plt = get_ftrace_plt(mod, addr);
+ if (!plt) {
+ pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+ return -EINVAL;
}
- addr = (unsigned long)dst;
-#else /* CONFIG_ARM64_MODULE_PLTS */
- return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
+
+ addr = (unsigned long)plt;
}
old = aarch64_insn_gen_nop();
@@ -144,6 +124,55 @@
return ftrace_modify_code(pc, old, new, true);
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_branch_imm(pc, old_addr,
+ AARCH64_INSN_BRANCH_LINK);
+ new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * The compiler has inserted two NOPs before the regular function prologue.
+ * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
+ * and x9-x18 are free for our use.
+ *
+ * At runtime we want to be able to swing a single NOP <-> BL to enable or
+ * disable the ftrace call. The BL requires us to save the original LR value,
+ * so here we insert a <MOV X9, LR> over the first NOP so the instructions
+ * before the regular prologue are:
+ *
+ * | Compiled | Disabled | Enabled |
+ * +----------+------------+------------+
+ * | NOP | MOV X9, LR | MOV X9, LR |
+ * | NOP | NOP | BL <entry> |
+ *
+ * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * before returning to the regular function prologue. When a function is not
+ * being traced, the MOV is not harmful given x9 is not live per the AAPCS.
+ *
+ * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
+ * the BL.
+ */
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
+ u32 old, new;
+
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
+ AARCH64_INSN_REG_LR,
+ AARCH64_INSN_VARIANT_64BIT);
+ return ftrace_modify_code(pc, old, new, true);
+}
+#endif
+
/*
* Turn off the call to ftrace_caller() in instrumented function
*/
@@ -156,9 +185,11 @@
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
u32 replaced;
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return -EINVAL;
+
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
@@ -189,9 +220,6 @@
return -EINVAL;
validate = false;
-#else /* CONFIG_ARM64_MODULE_PLTS */
- return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
} else {
old = aarch64_insn_gen_branch_imm(pc, addr,
AARCH64_INSN_BRANCH_LINK);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2f784d3..f9119ee 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -12,7 +12,9 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/pgtable.h>
+#include <asm/asm_pointer_auth.h>
#include <asm/assembler.h>
#include <asm/boot.h>
#include <asm/ptrace.h>
@@ -25,8 +27,8 @@
#include <asm/kvm_arm.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -34,14 +36,10 @@
#include "efi-header.S"
-#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
+#define __PHYS_OFFSET KERNEL_START
-#if (TEXT_OFFSET & 0xfff) != 0
-#error TEXT_OFFSET must be at least 4KB aligned
-#elif (PAGE_OFFSET & 0x1fffff) != 0
+#if (PAGE_OFFSET & 0x1fffff) != 0
#error PAGE_OFFSET must be at least 2MB aligned
-#elif TEXT_OFFSET > 0x1fffff
-#error TEXT_OFFSET must be less than 2MB
#endif
/*
@@ -53,7 +51,7 @@
* x0 = physical address to the FDT blob.
*
* This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET + TEXT_OFFSET).
+ * __pa(PAGE_OFFSET).
*
* Note that the callee-saved registers are used for storing variables
* that are useful before the MMU is enabled. The allocations are described
@@ -70,12 +68,12 @@
* its opcode forms the magic "MZ" signature required by UEFI.
*/
add x13, x18, #0x16
- b stext
+ b primary_entry
#else
- b stext // branch to kernel start, magic
+ b primary_entry // branch to kernel start, magic
.long 0 // reserved
#endif
- le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian
+ .quad 0 // Image load offset from start of RAM, little-endian
le64sym _kernel_size_le // Effective size of kernel image, little-endian
le64sym _kernel_flags_le // Informative flags, little-endian
.quad 0 // reserved
@@ -98,14 +96,13 @@
* primary lowlevel boot path:
*
* Register Scope Purpose
- * x21 stext() .. start_kernel() FDT pointer passed at boot in x0
- * x23 stext() .. start_kernel() physical misalignment/KASLR offset
- * x28 __create_page_tables() callee preserved temp register
- * x19/x20 __primary_switch() callee preserved temp registers
- * x24 __primary_switch() .. relocate_kernel()
- * current RELR displacement
+ * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
+ * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
+ * x28 __create_page_tables() callee preserved temp register
+ * x19/x20 __primary_switch() callee preserved temp registers
+ * x24 __primary_switch() .. relocate_kernel() current RELR displacement
*/
-ENTRY(stext)
+SYM_CODE_START(primary_entry)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w0=cpu_boot_mode
adrp x23, __PHYS_OFFSET
@@ -120,12 +117,12 @@
*/
bl __cpu_setup // initialise processor
b __primary_switch
-ENDPROC(stext)
+SYM_CODE_END(primary_entry)
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
*/
-preserve_boot_args:
+SYM_CODE_START_LOCAL(preserve_boot_args)
mov x21, x0 // x21=FDT
adr_l x0, boot_args // record the contents of
@@ -137,7 +134,7 @@
mov x1, #0x20 // 4 x 8 bytes
b __inval_dcache_area // tail call
-ENDPROC(preserve_boot_args)
+SYM_CODE_END(preserve_boot_args)
/*
* Macro to create a table entry to the next page.
@@ -276,7 +273,7 @@
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled
*/
-__create_page_tables:
+SYM_FUNC_START_LOCAL(__create_page_tables)
mov x28, lr
/*
@@ -382,7 +379,7 @@
* Map the kernel image (starting with PHYS_OFFSET).
*/
adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
+ mov_q x5, KIMAGE_VADDR // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
mov x4, PTRS_PER_PGD
adrp x6, _end // runtime __pa(_end)
@@ -410,20 +407,23 @@
bl __inval_dcache_area
ret x28
-ENDPROC(__create_page_tables)
- .ltorg
+SYM_FUNC_END(__create_page_tables)
/*
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __PHYS_OFFSET
*/
-__primary_switched:
+SYM_FUNC_START_LOCAL(__primary_switched)
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
adr_l x5, init_task
msr sp_el0, x5 // Save thread_info
+#ifdef CONFIG_ARM64_PTR_AUTH
+ __ptrauth_keys_init_cpu x5, x6, x7, x8
+#endif
+
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
@@ -431,6 +431,10 @@
stp xzr, x30, [sp, #-16]!
mov x29, sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l scs_sp, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between
@@ -463,7 +467,14 @@
mov x29, #0
mov x30, #0
b start_kernel
-ENDPROC(__primary_switched)
+SYM_FUNC_END(__primary_switched)
+
+ .pushsection ".rodata", "a"
+SYM_DATA_START(kimage_vaddr)
+ .quad _text
+SYM_DATA_END(kimage_vaddr)
+EXPORT_SYMBOL(kimage_vaddr)
+ .popsection
/*
* end early head section, begin head code that is also used for
@@ -471,10 +482,6 @@
*/
.section ".idmap.text","awx"
-ENTRY(kimage_vaddr)
- .quad _text - TEXT_OFFSET
-EXPORT_SYMBOL(kimage_vaddr)
-
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
@@ -482,7 +489,7 @@
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
-ENTRY(el2_setup)
+SYM_FUNC_START(el2_setup)
msr SPsel, #1 // We want to use SP_EL{1,2}
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
@@ -606,7 +613,7 @@
isb
ret
-install_el2_stub:
+SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
@@ -643,13 +650,13 @@
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
eret
-ENDPROC(el2_setup)
+SYM_FUNC_END(el2_setup)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in w0. See arch/arm64/include/asm/virt.h for more info.
*/
-set_cpu_boot_mode_flag:
+SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
adr_l x1, __boot_cpu_mode
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
@@ -658,7 +665,7 @@
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret
-ENDPROC(set_cpu_boot_mode_flag)
+SYM_FUNC_END(set_cpu_boot_mode_flag)
/*
* These values are written with the MMU off, but read with the MMU on.
@@ -674,15 +681,17 @@
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
-ENTRY(__boot_cpu_mode)
+SYM_DATA_START(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
+SYM_DATA_END(__boot_cpu_mode)
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*/
-ENTRY(__early_cpu_boot_status)
+SYM_DATA_START(__early_cpu_boot_status)
.quad 0
+SYM_DATA_END(__early_cpu_boot_status)
.popsection
@@ -690,7 +699,7 @@
* This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise.
*/
-ENTRY(secondary_holding_pen)
+SYM_FUNC_START(secondary_holding_pen)
bl el2_setup // Drop to EL1, w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
@@ -702,19 +711,19 @@
b.eq secondary_startup
wfe
b pen
-ENDPROC(secondary_holding_pen)
+SYM_FUNC_END(secondary_holding_pen)
/*
* Secondary entry point that jumps straight into the kernel. Only to
* be used where CPUs are brought online dynamically by the kernel.
*/
-ENTRY(secondary_entry)
+SYM_FUNC_START(secondary_entry)
bl el2_setup // Drop to EL1
bl set_cpu_boot_mode_flag
b secondary_startup
-ENDPROC(secondary_entry)
+SYM_FUNC_END(secondary_entry)
-secondary_startup:
+SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
@@ -724,9 +733,9 @@
bl __enable_mmu
ldr x8, =__secondary_switched
br x8
-ENDPROC(secondary_startup)
+SYM_FUNC_END(secondary_startup)
-__secondary_switched:
+SYM_FUNC_START_LOCAL(__secondary_switched)
adr_l x5, vectors
msr vbar_el1, x5
isb
@@ -738,16 +747,22 @@
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+ scs_load x2, x3
mov x29, #0
mov x30, #0
- b secondary_start_kernel
-ENDPROC(__secondary_switched)
-__secondary_too_slow:
+#ifdef CONFIG_ARM64_PTR_AUTH
+ ptrauth_keys_init_cpu x2, x3, x4, x5
+#endif
+
+ b secondary_start_kernel
+SYM_FUNC_END(__secondary_switched)
+
+SYM_FUNC_START_LOCAL(__secondary_too_slow)
wfe
wfi
b __secondary_too_slow
-ENDPROC(__secondary_too_slow)
+SYM_FUNC_END(__secondary_too_slow)
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
@@ -779,7 +794,7 @@
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
-ENTRY(__enable_mmu)
+SYM_FUNC_START(__enable_mmu)
mrs x2, ID_AA64MMFR0_EL1
ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
@@ -803,9 +818,9 @@
dsb nsh
isb
ret
-ENDPROC(__enable_mmu)
+SYM_FUNC_END(__enable_mmu)
-ENTRY(__cpu_secondary_check52bitva)
+SYM_FUNC_START(__cpu_secondary_check52bitva)
#ifdef CONFIG_ARM64_VA_BITS_52
ldr_l x0, vabits_actual
cmp x0, #52
@@ -823,9 +838,9 @@
#endif
2: ret
-ENDPROC(__cpu_secondary_check52bitva)
+SYM_FUNC_END(__cpu_secondary_check52bitva)
-__no_granule_support:
+SYM_FUNC_START_LOCAL(__no_granule_support)
/* Indicate that this CPU can't boot and is stuck in the kernel */
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2
@@ -833,10 +848,10 @@
wfe
wfi
b 1b
-ENDPROC(__no_granule_support)
+SYM_FUNC_END(__no_granule_support)
#ifdef CONFIG_RELOCATABLE
-__relocate_kernel:
+SYM_FUNC_START_LOCAL(__relocate_kernel)
/*
* Iterate over each entry in the relocation table, and apply the
* relocations in place.
@@ -938,10 +953,10 @@
#endif
ret
-ENDPROC(__relocate_kernel)
+SYM_FUNC_END(__relocate_kernel)
#endif
-__primary_switch:
+SYM_FUNC_START_LOCAL(__primary_switch)
#ifdef CONFIG_RANDOMIZE_BASE
mov x19, x0 // preserve new SCTLR_EL1 value
mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
@@ -985,4 +1000,4 @@
ldr x8, =__primary_switched
adrp x0, __PHYS_OFFSET
br x8
-ENDPROC(__primary_switch)
+SYM_FUNC_END(__primary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 38bcd4d..8ccca66 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -65,7 +65,7 @@
* x5: physical address of a zero page that remains zero after resume
*/
.pushsection ".hibernate_exit.text", "ax"
-ENTRY(swsusp_arch_suspend_exit)
+SYM_CODE_START(swsusp_arch_suspend_exit)
/*
* We execute from ttbr0, change ttbr1 to our copied linear map tables
* with a break-before-make via the zero page
@@ -110,9 +110,7 @@
cbz x24, 3f /* Do we need to re-initialise EL2? */
hvc #0
3: ret
-
- .ltorg
-ENDPROC(swsusp_arch_suspend_exit)
+SYM_CODE_END(swsusp_arch_suspend_exit)
/*
* Restore the hyp stub.
@@ -121,15 +119,15 @@
*
* x24: The physical address of __hyp_stub_vectors
*/
-el1_sync:
+SYM_CODE_START_LOCAL(el1_sync)
msr vbar_el2, x24
eret
-ENDPROC(el1_sync)
+SYM_CODE_END(el1_sync)
.macro invalid_vector label
-\label:
+SYM_CODE_START_LOCAL(\label)
b \label
-ENDPROC(\label)
+SYM_CODE_END(\label)
.endm
invalid_vector el2_sync_invalid
@@ -143,7 +141,7 @@
/* el2 vectors - switch el2 here while we restore the memory image. */
.align 11
-ENTRY(hibernate_el2_vectors)
+SYM_CODE_START(hibernate_el2_vectors)
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
@@ -163,6 +161,6 @@
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
-END(hibernate_el2_vectors)
+SYM_CODE_END(hibernate_el2_vectors)
.popsection
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a96b292..4200377 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -21,7 +21,6 @@
#include <linux/sched.h>
#include <linux/suspend.h>
#include <linux/utsname.h>
-#include <linux/version.h>
#include <asm/barrier.h>
#include <asm/cacheflush.h>
@@ -31,8 +30,8 @@
#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
+#include <asm/mte.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
@@ -166,14 +165,11 @@
sleep_cpu = -EINVAL;
return -EINVAL;
}
- if (!cpu_online(sleep_cpu)) {
- pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
- ret = cpu_up(sleep_cpu);
- if (ret) {
- pr_err("Failed to bring hibernate-CPU up!\n");
- sleep_cpu = -EINVAL;
- return ret;
- }
+
+ ret = bringup_hibernate_cpu(sleep_cpu);
+ if (ret) {
+ sleep_cpu = -EINVAL;
+ return ret;
}
resume_hdr = *hdr;
@@ -182,9 +178,57 @@
}
EXPORT_SYMBOL(arch_hibernation_header_restore);
+static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
+ unsigned long dst_addr,
+ pgprot_t pgprot)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
+ if (pgd_none(READ_ONCE(*pgdp))) {
+ pudp = (void *)get_safe_page(GFP_ATOMIC);
+ if (!pudp)
+ return -ENOMEM;
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+
+ p4dp = p4d_offset(pgdp, dst_addr);
+ if (p4d_none(READ_ONCE(*p4dp))) {
+ pudp = (void *)get_safe_page(GFP_ATOMIC);
+ if (!pudp)
+ return -ENOMEM;
+ p4d_populate(&init_mm, p4dp, pudp);
+ }
+
+ pudp = pud_offset(p4dp, dst_addr);
+ if (pud_none(READ_ONCE(*pudp))) {
+ pmdp = (void *)get_safe_page(GFP_ATOMIC);
+ if (!pmdp)
+ return -ENOMEM;
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+
+ pmdp = pmd_offset(pudp, dst_addr);
+ if (pmd_none(READ_ONCE(*pmdp))) {
+ ptep = (void *)get_safe_page(GFP_ATOMIC);
+ if (!ptep)
+ return -ENOMEM;
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+
+ ptep = pte_offset_kernel(pmdp, dst_addr);
+ set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
+
+ return 0;
+}
+
/*
* Copies length bytes, starting at src_start into an new page,
- * perform cache maintentance, then maps it at the specified address low
+ * perform cache maintenance, then maps it at the specified address low
* address as executable.
*
* This is used by hibernate to copy the code it needs to execute when
@@ -196,64 +240,26 @@
*/
static int create_safe_exec_page(void *src_start, size_t length,
unsigned long dst_addr,
- phys_addr_t *phys_dst_addr,
- void *(*allocator)(gfp_t mask),
- gfp_t mask)
+ phys_addr_t *phys_dst_addr)
{
- int rc = 0;
+ void *page = (void *)get_safe_page(GFP_ATOMIC);
pgd_t *trans_pgd;
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
- unsigned long dst = (unsigned long)allocator(mask);
+ int rc;
- if (!dst) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!page)
+ return -ENOMEM;
- memcpy((void *)dst, src_start, length);
- __flush_icache_range(dst, dst + length);
+ memcpy(page, src_start, length);
+ __flush_icache_range((unsigned long)page, (unsigned long)page + length);
- trans_pgd = allocator(mask);
- if (!trans_pgd) {
- rc = -ENOMEM;
- goto out;
- }
+ trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
+ if (!trans_pgd)
+ return -ENOMEM;
- pgdp = pgd_offset_raw(trans_pgd, dst_addr);
- if (pgd_none(READ_ONCE(*pgdp))) {
- pudp = allocator(mask);
- if (!pudp) {
- rc = -ENOMEM;
- goto out;
- }
- pgd_populate(&init_mm, pgdp, pudp);
- }
-
- pudp = pud_offset(pgdp, dst_addr);
- if (pud_none(READ_ONCE(*pudp))) {
- pmdp = allocator(mask);
- if (!pmdp) {
- rc = -ENOMEM;
- goto out;
- }
- pud_populate(&init_mm, pudp, pmdp);
- }
-
- pmdp = pmd_offset(pudp, dst_addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- ptep = allocator(mask);
- if (!ptep) {
- rc = -ENOMEM;
- goto out;
- }
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
-
- ptep = pte_offset_kernel(pmdp, dst_addr);
- set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+ rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
+ PAGE_KERNEL_EXEC);
+ if (rc)
+ return rc;
/*
* Load our new page tables. A strict BBM approach requires that we
@@ -269,17 +275,127 @@
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
+ write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
isb();
- *phys_dst_addr = virt_to_phys((void *)dst);
+ *phys_dst_addr = virt_to_phys(page);
-out:
- return rc;
+ return 0;
}
#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
+#ifdef CONFIG_ARM64_MTE
+
+static DEFINE_XARRAY(mte_pages);
+
+static int save_tags(struct page *page, unsigned long pfn)
+{
+ void *tag_storage, *ret;
+
+ tag_storage = mte_allocate_tag_storage();
+ if (!tag_storage)
+ return -ENOMEM;
+
+ mte_save_page_tags(page_address(page), tag_storage);
+
+ ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL);
+ if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
+ mte_free_tag_storage(tag_storage);
+ return xa_err(ret);
+ } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) {
+ mte_free_tag_storage(ret);
+ }
+
+ return 0;
+}
+
+static void swsusp_mte_free_storage(void)
+{
+ XA_STATE(xa_state, &mte_pages, 0);
+ void *tags;
+
+ xa_lock(&mte_pages);
+ xas_for_each(&xa_state, tags, ULONG_MAX) {
+ mte_free_tag_storage(tags);
+ }
+ xa_unlock(&mte_pages);
+
+ xa_destroy(&mte_pages);
+}
+
+static int swsusp_mte_save_tags(void)
+{
+ struct zone *zone;
+ unsigned long pfn, max_zone_pfn;
+ int ret = 0;
+ int n = 0;
+
+ if (!system_supports_mte())
+ return 0;
+
+ for_each_populated_zone(zone) {
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
+ struct page *page = pfn_to_online_page(pfn);
+
+ if (!page)
+ continue;
+
+ if (!test_bit(PG_mte_tagged, &page->flags))
+ continue;
+
+ ret = save_tags(page, pfn);
+ if (ret) {
+ swsusp_mte_free_storage();
+ goto out;
+ }
+
+ n++;
+ }
+ }
+ pr_info("Saved %d MTE pages\n", n);
+
+out:
+ return ret;
+}
+
+static void swsusp_mte_restore_tags(void)
+{
+ XA_STATE(xa_state, &mte_pages, 0);
+ int n = 0;
+ void *tags;
+
+ xa_lock(&mte_pages);
+ xas_for_each(&xa_state, tags, ULONG_MAX) {
+ unsigned long pfn = xa_state.xa_index;
+ struct page *page = pfn_to_online_page(pfn);
+
+ mte_restore_page_tags(page_address(page), tags);
+
+ mte_free_tag_storage(tags);
+ n++;
+ }
+ xa_unlock(&mte_pages);
+
+ pr_info("Restored %d MTE pages\n", n);
+
+ xa_destroy(&mte_pages);
+}
+
+#else /* CONFIG_ARM64_MTE */
+
+static int swsusp_mte_save_tags(void)
+{
+ return 0;
+}
+
+static void swsusp_mte_restore_tags(void)
+{
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
int swsusp_arch_suspend(void)
{
int ret = 0;
@@ -297,6 +413,10 @@
/* make the crash dump kernel image visible/saveable */
crash_prepare_suspend();
+ ret = swsusp_mte_save_tags();
+ if (ret)
+ return ret;
+
sleep_cpu = smp_processor_id();
ret = swsusp_save();
} else {
@@ -310,6 +430,8 @@
dcache_clean_range(__hyp_text_start, __hyp_text_end);
}
+ swsusp_mte_restore_tags();
+
/* make the crash dump kernel image protected again */
crash_post_resume();
@@ -327,11 +449,7 @@
* mitigation off behind our back, let's set the state
* to what we expect it to be.
*/
- switch (arm64_get_ssbd_state()) {
- case ARM64_SSBD_FORCE_ENABLE:
- case ARM64_SSBD_KERNEL:
- arm64_set_ssbd_mitigation(true);
- }
+ spectre_v4_enable_mitigation(NULL);
}
local_daif_restore(flags);
@@ -422,7 +540,7 @@
return 0;
}
-static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
+static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
unsigned long end)
{
pud_t *dst_pudp;
@@ -430,15 +548,15 @@
unsigned long next;
unsigned long addr = start;
- if (pgd_none(READ_ONCE(*dst_pgdp))) {
+ if (p4d_none(READ_ONCE(*dst_p4dp))) {
dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pudp)
return -ENOMEM;
- pgd_populate(&init_mm, dst_pgdp, dst_pudp);
+ p4d_populate(&init_mm, dst_p4dp, dst_pudp);
}
- dst_pudp = pud_offset(dst_pgdp, start);
+ dst_pudp = pud_offset(dst_p4dp, start);
- src_pudp = pud_offset(src_pgdp, start);
+ src_pudp = pud_offset(src_p4dp, start);
do {
pud_t pud = READ_ONCE(*src_pudp);
@@ -450,13 +568,34 @@
return -ENOMEM;
} else {
set_pud(dst_pudp,
- __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
+ __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
}
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
+static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
+ unsigned long end)
+{
+ p4d_t *dst_p4dp;
+ p4d_t *src_p4dp;
+ unsigned long next;
+ unsigned long addr = start;
+
+ dst_p4dp = p4d_offset(dst_pgdp, start);
+ src_p4dp = p4d_offset(src_pgdp, start);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(READ_ONCE(*src_p4dp)))
+ continue;
+ if (copy_pud(dst_p4dp, src_p4dp, addr, next))
+ return -ENOMEM;
+ } while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
+
+ return 0;
+}
+
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
@@ -464,18 +603,36 @@
unsigned long addr = start;
pgd_t *src_pgdp = pgd_offset_k(start);
- dst_pgdp = pgd_offset_raw(dst_pgdp, start);
+ dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
- if (copy_pud(dst_pgdp, src_pgdp, addr, next))
+ if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
+static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
+ unsigned long end)
+{
+ int rc;
+ pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+
+ if (!trans_pgd) {
+ pr_err("Failed to allocate memory for temporary page tables.\n");
+ return -ENOMEM;
+ }
+
+ rc = copy_page_tables(trans_pgd, start, end);
+ if (!rc)
+ *dst_pgdp = trans_pgd;
+
+ return rc;
+}
+
/*
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
*
@@ -484,7 +641,7 @@
*/
int swsusp_arch_resume(void)
{
- int rc = 0;
+ int rc;
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
@@ -497,15 +654,9 @@
* Create a second copy of just the linear map, and use this when
* restoring.
*/
- tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!tmp_pg_dir) {
- pr_err("Failed to allocate memory for temporary page tables.\n");
- rc = -ENOMEM;
- goto out;
- }
- rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END);
+ rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
if (rc)
- goto out;
+ return rc;
/*
* We need a zero page that is zero before & after resume in order to
@@ -514,8 +665,7 @@
zero_page = (void *)get_safe_page(GFP_ATOMIC);
if (!zero_page) {
pr_err("Failed to allocate zero page.\n");
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
/*
@@ -530,11 +680,10 @@
*/
rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
(unsigned long)hibernate_exit,
- &phys_hibernate_exit,
- (void *)get_safe_page, GFP_ATOMIC);
+ &phys_hibernate_exit);
if (rc) {
pr_err("Failed to create safe executable page for hibernate_exit code.\n");
- goto out;
+ return rc;
}
/*
@@ -561,8 +710,7 @@
resume_hdr.reenter_kernel, restore_pblist,
resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
-out:
- return rc;
+ return 0;
}
int hibernate_resume_nonboot_cpu_disable(void)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b4a1607..712e97c 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -51,7 +51,7 @@
case TYPE_DATA:
return get_num_wrps();
default:
- pr_warning("unknown slot type: %d\n", type);
+ pr_warn("unknown slot type: %d\n", type);
return 0;
}
}
@@ -112,7 +112,7 @@
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
- pr_warning("attempt to read from unknown breakpoint register %d\n", n);
+ pr_warn("attempt to read from unknown breakpoint register %d\n", n);
}
return val;
@@ -127,7 +127,7 @@
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
- pr_warning("attempt to write to unknown breakpoint register %d\n", n);
+ pr_warn("attempt to write to unknown breakpoint register %d\n", n);
}
isb();
}
@@ -145,7 +145,7 @@
case AARCH64_BREAKPOINT_EL1:
return DBG_ACTIVE_EL1;
default:
- pr_warning("invalid breakpoint privilege level %d\n", privilege);
+ pr_warn("invalid breakpoint privilege level %d\n", privilege);
return -EINVAL;
}
}
@@ -257,7 +257,7 @@
* level.
*/
enable_debug_monitors(dbg_el);
- /* Fall through */
+ fallthrough;
case HW_BREAKPOINT_RESTORE:
/* Setup the address register. */
write_wb_reg(val_reg, i, info->address);
@@ -541,13 +541,13 @@
if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2)
break;
- /* Fallthrough */
+ fallthrough;
case 3:
/* Allow single byte watchpoint. */
if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1)
break;
- /* Fallthrough */
+ fallthrough;
default:
return -EINVAL;
}
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 73d4607..160f588 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -21,7 +21,7 @@
.align 11
-ENTRY(__hyp_stub_vectors)
+SYM_CODE_START(__hyp_stub_vectors)
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
@@ -41,11 +41,11 @@
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
-ENDPROC(__hyp_stub_vectors)
+SYM_CODE_END(__hyp_stub_vectors)
.align 11
-el1_sync:
+SYM_CODE_START_LOCAL(el1_sync)
cmp x0, #HVC_SET_VECTORS
b.ne 2f
msr vbar_el2, x1
@@ -63,17 +63,17 @@
beq 9f // Nothing to reset!
/* Someone called kvm_call_hyp() against the hyp-stub... */
- ldr x0, =HVC_STUB_ERR
+ mov_q x0, HVC_STUB_ERR
eret
9: mov x0, xzr
eret
-ENDPROC(el1_sync)
+SYM_CODE_END(el1_sync)
.macro invalid_vector label
-\label:
+SYM_CODE_START_LOCAL(\label)
b \label
-ENDPROC(\label)
+SYM_CODE_END(\label)
.endm
invalid_vector el2_sync_invalid
@@ -106,15 +106,15 @@
* initialisation entry point.
*/
-ENTRY(__hyp_set_vectors)
+SYM_FUNC_START(__hyp_set_vectors)
mov x1, x0
mov x0, #HVC_SET_VECTORS
hvc #0
ret
-ENDPROC(__hyp_set_vectors)
+SYM_FUNC_END(__hyp_set_vectors)
-ENTRY(__hyp_reset_vectors)
+SYM_FUNC_START(__hyp_reset_vectors)
mov x0, #HVC_RESET_VECTORS
hvc #0
ret
-ENDPROC(__hyp_reset_vectors)
+SYM_FUNC_END(__hyp_reset_vectors)
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 25a2a9b..c615b28 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -12,7 +12,9 @@
#ifdef CONFIG_EFI
-__efistub_stext_offset = stext - _text;
+__efistub_kernel_size = _edata - _text;
+__efistub_primary_entry_offset = primary_entry - _text;
+
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
@@ -33,7 +35,7 @@
__efistub_strcmp = __pi_strcmp;
__efistub_strncmp = __pi_strncmp;
__efistub_strrchr = __pi_strrchr;
-__efistub___flush_dcache_area = __pi___flush_dcache_area;
+__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
#ifdef CONFIG_KASAN
__efistub___memcpy = __pi_memcpy;
@@ -45,7 +47,62 @@
__efistub__end = _end;
__efistub__edata = _edata;
__efistub_screen_info = screen_info;
+__efistub__ctype = _ctype;
#endif
+#ifdef CONFIG_KVM
+
+/*
+ * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to
+ * separate it from the kernel proper. The following symbols are legally
+ * accessed by it, therefore provide aliases to make them linkable.
+ * Do not include symbols which may not be safely accessed under hypervisor
+ * memory mappings.
+ */
+
+/* Alternative callbacks for init-time patching of nVHE hyp code. */
+KVM_NVHE_ALIAS(kvm_patch_vector_branch);
+KVM_NVHE_ALIAS(kvm_update_va_mask);
+
+/* Global kernel state accessed by nVHE hyp code. */
+KVM_NVHE_ALIAS(kvm_vgic_global_state);
+
+/* Kernel constant needed to compute idmap addresses. */
+KVM_NVHE_ALIAS(kimage_voffset);
+
+/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
+KVM_NVHE_ALIAS(__hyp_panic_string);
+KVM_NVHE_ALIAS(panic);
+
+/* Vectors installed by hyp-init on reset HVC. */
+KVM_NVHE_ALIAS(__hyp_stub_vectors);
+
+/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */
+KVM_NVHE_ALIAS(idmap_t0sz);
+
+/* Kernel symbol used by icache_is_vpipt(). */
+KVM_NVHE_ALIAS(__icache_flags);
+
+/* Kernel symbols needed for cpus_have_final/const_caps checks. */
+KVM_NVHE_ALIAS(arm64_const_caps_ready);
+KVM_NVHE_ALIAS(cpu_hwcap_keys);
+
+/* Static keys which are set if a vGIC trap should be handled in hyp. */
+KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
+KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
+
+/* Static key checked in pmr_sync(). */
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+KVM_NVHE_ALIAS(gic_pmr_sync);
+/* Static key checked in GIC_PRIO_IRQOFF. */
+KVM_NVHE_ALIAS(gic_nonsecure_priorities);
+#endif
+
+/* EL2 exception handling */
+KVM_NVHE_ALIAS(__start___kvm_ex_table);
+KVM_NVHE_ALIAS(__stop___kvm_ex_table);
+
+#endif /* CONFIG_KVM */
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index c7d38c6..7bc3ba8 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -62,7 +62,6 @@
*/
#define HEAD_SYMBOLS \
DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \
- DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \
DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
#endif /* __ARM64_KERNEL_IMAGE_H */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 53bcf53..6c0de2f 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -51,21 +51,27 @@
return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
}
-/* NOP is an alias of HINT */
-bool __kprobes aarch64_insn_is_nop(u32 insn)
+bool __kprobes aarch64_insn_is_steppable_hint(u32 insn)
{
if (!aarch64_insn_is_hint(insn))
return false;
switch (insn & 0xFE0) {
- case AARCH64_INSN_HINT_YIELD:
- case AARCH64_INSN_HINT_WFE:
- case AARCH64_INSN_HINT_WFI:
- case AARCH64_INSN_HINT_SEV:
- case AARCH64_INSN_HINT_SEVL:
- return false;
- default:
+ case AARCH64_INSN_HINT_XPACLRI:
+ case AARCH64_INSN_HINT_PACIA_1716:
+ case AARCH64_INSN_HINT_PACIB_1716:
+ case AARCH64_INSN_HINT_PACIAZ:
+ case AARCH64_INSN_HINT_PACIASP:
+ case AARCH64_INSN_HINT_PACIBZ:
+ case AARCH64_INSN_HINT_PACIBSP:
+ case AARCH64_INSN_HINT_BTI:
+ case AARCH64_INSN_HINT_BTIC:
+ case AARCH64_INSN_HINT_BTIJ:
+ case AARCH64_INSN_HINT_BTIJC:
+ case AARCH64_INSN_HINT_NOP:
return true;
+ default:
+ return false;
}
}
@@ -123,7 +129,7 @@
int ret;
__le32 val;
- ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
+ ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
if (!ret)
*insnp = le32_to_cpu(val);
@@ -139,7 +145,7 @@
raw_spin_lock_irqsave(&patch_lock, flags);
waddr = patch_map(addr, FIX_TEXT_POKE0);
- ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
+ ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
patch_unmap(FIX_TEXT_POKE0);
raw_spin_unlock_irqrestore(&patch_lock, flags);
@@ -164,7 +170,7 @@
bool __kprobes aarch64_insn_is_branch(u32 insn)
{
- /* b, bl, cb*, tb*, b.cond, br, blr */
+ /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
return aarch64_insn_is_b(insn) ||
aarch64_insn_is_bl(insn) ||
@@ -173,8 +179,11 @@
aarch64_insn_is_tbz(insn) ||
aarch64_insn_is_tbnz(insn) ||
aarch64_insn_is_ret(insn) ||
+ aarch64_insn_is_ret_auth(insn) ||
aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_br_auth(insn) ||
aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_blr_auth(insn) ||
aarch64_insn_is_bcond(insn);
}
@@ -574,7 +583,7 @@
offset >> 2);
}
-u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op)
+u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
{
return aarch64_insn_get_hint_value() | op;
}
@@ -1282,6 +1291,19 @@
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
}
+/*
+ * MOV (register) is architecturally an alias of ORR (shifted register) where
+ * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
+ */
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant)
+{
+ return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
+ src, 0, variant,
+ AARCH64_INSN_LOGIC_ORR);
+}
+
u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
enum aarch64_insn_register reg,
enum aarch64_insn_adr_type type)
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327c..60456a6 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -10,10 +10,10 @@
* Copyright (C) 2012 ARM Ltd.
*/
-#include <linux/kernel_stat.h>
#include <linux/irq.h>
#include <linux/memory.h>
#include <linux/smp.h>
+#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/irqchip.h>
#include <linux/kprobes.h>
@@ -22,20 +22,11 @@
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
-unsigned long irq_err_count;
-
/* Only access this in an NMI enter/exit */
DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts);
DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
-int arch_show_interrupts(struct seq_file *p, int prec)
-{
- show_ipi_list(p, prec);
- seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
- return 0;
-}
-
#ifdef CONFIG_VMAP_STACK
static void init_irq_stacks(void)
{
@@ -76,18 +67,3 @@
local_daif_restore(DAIF_PROCCTX_NOIRQ);
}
}
-
-/*
- * Stubs to make nmi_enter/exit() code callable from ASM
- */
-asmlinkage void notrace asm_nmi_enter(void)
-{
- nmi_enter();
-}
-NOKPROBE_SYMBOL(asm_nmi_enter);
-
-asmlinkage void notrace asm_nmi_exit(void)
-{
- nmi_exit();
-}
-NOKPROBE_SYMBOL(asm_nmi_exit);
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 416f537..b181e05 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -10,15 +10,24 @@
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/types.h>
+#include <linux/pgtable.h>
+#include <linux/random.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/sections.h>
+enum kaslr_status {
+ KASLR_ENABLED,
+ KASLR_DISABLED_CMDLINE,
+ KASLR_DISABLED_NO_SEED,
+ KASLR_DISABLED_FDT_REMAP,
+};
+
+static enum kaslr_status __initdata kaslr_status;
u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
@@ -75,6 +84,7 @@
void *fdt;
u64 seed, offset, mask, module_range;
const u8 *cmdline, *str;
+ unsigned long raw;
int size;
/*
@@ -91,15 +101,15 @@
*/
early_fixmap_init();
fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
- if (!fdt)
+ if (!fdt) {
+ kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
+ }
/*
* Retrieve (and wipe) the seed from the FDT
*/
seed = get_kaslr_seed(fdt);
- if (!seed)
- return 0;
/*
* Check if 'nokaslr' appears on the command line, and
@@ -107,8 +117,23 @@
*/
cmdline = kaslr_get_cmdline(fdt);
str = strstr(cmdline, "nokaslr");
- if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+ if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+ kaslr_status = KASLR_DISABLED_CMDLINE;
return 0;
+ }
+
+ /*
+ * Mix in any entropy obtainable architecturally if enabled
+ * and supported.
+ */
+
+ if (arch_get_random_seed_long_early(&raw))
+ seed ^= raw;
+
+ if (!seed) {
+ kaslr_status = KASLR_DISABLED_NO_SEED;
+ return 0;
+ }
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
@@ -170,3 +195,24 @@
return offset;
}
+
+static int __init kaslr_init(void)
+{
+ switch (kaslr_status) {
+ case KASLR_ENABLED:
+ pr_info("KASLR enabled\n");
+ break;
+ case KASLR_DISABLED_CMDLINE:
+ pr_info("KASLR disabled on command line\n");
+ break;
+ case KASLR_DISABLED_NO_SEED:
+ pr_warn("KASLR disabled due to lack of seed\n");
+ break;
+ case KASLR_DISABLED_FDT_REMAP:
+ pr_warn("KASLR disabled due to FDT remapping failure\n");
+ break;
+ }
+
+ return 0;
+}
+core_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 29a9428..9ec3469 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -43,14 +43,10 @@
u64 flags, value;
bool be_image, be_kernel;
struct kexec_buf kbuf;
- unsigned long text_offset;
+ unsigned long text_offset, kernel_segment_number;
struct kexec_segment *kernel_segment;
int ret;
- /* We don't support crash kernels yet. */
- if (image->type == KEXEC_TYPE_CRASH)
- return ERR_PTR(-EOPNOTSUPP);
-
/*
* We require a kernel with an unambiguous Image header. Per
* Documentation/arm64/booting.rst, this is the case when image_size
@@ -92,11 +88,37 @@
/* Adjust kernel segment with TEXT_OFFSET */
kbuf.memsz += text_offset;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- return ERR_PTR(ret);
+ kernel_segment_number = image->nr_segments;
- kernel_segment = &image->segment[image->nr_segments - 1];
+ /*
+ * The location of the kernel segment may make it impossible to satisfy
+ * the other segment requirements, so we try repeatedly to find a
+ * location that will work.
+ */
+ while ((ret = kexec_add_buffer(&kbuf)) == 0) {
+ /* Try to load additional data */
+ kernel_segment = &image->segment[kernel_segment_number];
+ ret = load_other_segments(image, kernel_segment->mem,
+ kernel_segment->memsz, initrd,
+ initrd_len, cmdline);
+ if (!ret)
+ break;
+
+ /*
+ * We couldn't find space for the other segments; erase the
+ * kernel segment and try the next available hole.
+ */
+ image->nr_segments -= 1;
+ kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ }
+
+ if (ret) {
+ pr_err("Could not find any suitable kernel location!");
+ return ERR_PTR(ret);
+ }
+
+ kernel_segment = &image->segment[kernel_segment_number];
kernel_segment->mem += text_offset;
kernel_segment->memsz -= text_offset;
image->start = kernel_segment->mem;
@@ -105,12 +127,7 @@
kernel_segment->mem, kbuf.bufsz,
kernel_segment->memsz);
- /* Load additional data */
- ret = load_other_segments(image,
- kernel_segment->mem, kernel_segment->memsz,
- initrd, initrd_len, cmdline);
-
- return ERR_PTR(ret);
+ return NULL;
}
#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index cc049ff..a0b144c 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -160,18 +160,6 @@
kexec_image_info(kimage);
- pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
- kimage->control_code_page);
- pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
- &reboot_code_buffer_phys);
- pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
- reboot_code_buffer);
- pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
- arm64_relocate_new_kernel);
- pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
- __func__, __LINE__, arm64_relocate_new_kernel_size,
- arm64_relocate_new_kernel_size);
-
/*
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
* after the kernel is shut down.
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index d2a62dd..0cde47a 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -17,12 +17,15 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
#include <asm/byteorder.h>
/* relevant device tree properties */
+#define FDT_PROP_KEXEC_ELFHDR "linux,elfcorehdr"
+#define FDT_PROP_MEM_RANGE "linux,usable-memory-range"
#define FDT_PROP_INITRD_START "linux,initrd-start"
#define FDT_PROP_INITRD_END "linux,initrd-end"
#define FDT_PROP_BOOTARGS "bootargs"
@@ -40,6 +43,10 @@
vfree(image->arch.dtb);
image->arch.dtb = NULL;
+ vfree(image->arch.elf_headers);
+ image->arch.elf_headers = NULL;
+ image->arch.elf_headers_sz = 0;
+
return kexec_image_post_load_cleanup_default(image);
}
@@ -55,6 +62,31 @@
off = ret;
+ ret = fdt_delprop(dtb, off, FDT_PROP_KEXEC_ELFHDR);
+ if (ret && ret != -FDT_ERR_NOTFOUND)
+ goto out;
+ ret = fdt_delprop(dtb, off, FDT_PROP_MEM_RANGE);
+ if (ret && ret != -FDT_ERR_NOTFOUND)
+ goto out;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ /* add linux,elfcorehdr */
+ ret = fdt_appendprop_addrrange(dtb, 0, off,
+ FDT_PROP_KEXEC_ELFHDR,
+ image->arch.elf_headers_mem,
+ image->arch.elf_headers_sz);
+ if (ret)
+ return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+
+ /* add linux,usable-memory-range */
+ ret = fdt_appendprop_addrrange(dtb, 0, off,
+ FDT_PROP_MEM_RANGE,
+ crashk_res.start,
+ crashk_res.end - crashk_res.start + 1);
+ if (ret)
+ return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+ }
+
/* add bootargs */
if (cmdline) {
ret = fdt_setprop_string(dtb, off, FDT_PROP_BOOTARGS, cmdline);
@@ -89,7 +121,7 @@
/* add kaslr-seed */
ret = fdt_delprop(dtb, off, FDT_PROP_KASLR_SEED);
- if (ret == -FDT_ERR_NOTFOUND)
+ if (ret == -FDT_ERR_NOTFOUND)
ret = 0;
else if (ret)
goto out;
@@ -106,12 +138,12 @@
/* add rng-seed */
if (rng_is_initialized()) {
- u8 rng_seed[RNG_SEED_SIZE];
- get_random_bytes(rng_seed, RNG_SEED_SIZE);
- ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed,
- RNG_SEED_SIZE);
+ void *rng_seed;
+ ret = fdt_setprop_placeholder(dtb, off, FDT_PROP_RNG_SEED,
+ RNG_SEED_SIZE, &rng_seed);
if (ret)
goto out;
+ get_random_bytes(rng_seed, RNG_SEED_SIZE);
} else {
pr_notice("RNG is not initialised: omitting \"%s\" property\n",
FDT_PROP_RNG_SEED);
@@ -125,8 +157,8 @@
}
/*
- * More space needed so that we can add initrd, bootargs, kaslr-seed, and
- * rng-seed.
+ * More space needed so that we can add initrd, bootargs, kaslr-seed,
+ * rng-seed, userable-memory-range and elfcorehdr.
*/
#define DTB_EXTRA_SPACE 0x1000
@@ -176,6 +208,45 @@
}
}
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+ u64 i;
+ phys_addr_t start, end;
+
+ nr_ranges = 1; /* for exclusion of crashkernel region */
+ for_each_mem_range(i, &start, &end)
+ nr_ranges++;
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ for_each_mem_range(i, &start, &end) {
+ cmem->ranges[cmem->nr_ranges].start = start;
+ cmem->ranges[cmem->nr_ranges].end = end - 1;
+ cmem->nr_ranges++;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+
+ if (!ret)
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+ kfree(cmem);
+ return ret;
+}
+
+/*
+ * Tries to add the initrd and DTB to the image. If it is not possible to find
+ * valid locations, this function will undo changes to the image and return non
+ * zero.
+ */
int load_other_segments(struct kimage *image,
unsigned long kernel_load_addr,
unsigned long kernel_size,
@@ -183,14 +254,44 @@
char *cmdline)
{
struct kexec_buf kbuf;
- void *dtb = NULL;
- unsigned long initrd_load_addr = 0, dtb_len;
+ void *headers, *dtb = NULL;
+ unsigned long headers_sz, initrd_load_addr = 0, dtb_len,
+ orig_segments = image->nr_segments;
int ret = 0;
kbuf.image = image;
/* not allocate anything below the kernel */
kbuf.buf_min = kernel_load_addr + kernel_size;
+ /* load elf core header */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out_err;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = SZ_64K; /* largest supported page size */
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out_err;
+ }
+ image->arch.elf_headers = headers;
+ image->arch.elf_headers_mem = kbuf.mem;
+ image->arch.elf_headers_sz = headers_sz;
+
+ pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->arch.elf_headers_mem, kbuf.bufsz, kbuf.memsz);
+ }
+
/* load initrd */
if (initrd) {
kbuf.buffer = initrd;
@@ -209,7 +310,7 @@
initrd_load_addr = kbuf.mem;
pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- initrd_load_addr, initrd_len, initrd_len);
+ initrd_load_addr, kbuf.bufsz, kbuf.memsz);
}
/* load dtb */
@@ -236,11 +337,12 @@
image->arch.dtb_mem = kbuf.mem;
pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kbuf.mem, dtb_len, dtb_len);
+ kbuf.mem, kbuf.bufsz, kbuf.memsz);
return 0;
out_err:
+ image->nr_segments = orig_segments;
vfree(dtb);
return ret;
}
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 426018e..2e22443 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -4,6 +4,7 @@
*/
#include <linux/elf.h>
+#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
@@ -252,6 +253,40 @@
return ret;
}
+static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela,
+ Elf64_Word dstidx)
+{
+
+ Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info);
+
+ if (s->st_shndx == dstidx)
+ return false;
+
+ return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 ||
+ ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26;
+}
+
+/* Group branch PLT relas at the front end of the array. */
+static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
+ int numrels, Elf64_Word dstidx)
+{
+ int i = 0, j = numrels - 1;
+
+ if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return 0;
+
+ while (i < j) {
+ if (branch_rela_needs_plt(syms, &rela[i], dstidx))
+ i++;
+ else if (branch_rela_needs_plt(syms, &rela[j], dstidx))
+ swap(rela[i], rela[j]);
+ else
+ j--;
+ }
+
+ return i;
+}
+
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
@@ -288,7 +323,7 @@
for (i = 0; i < ehdr->e_shnum; i++) {
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
- int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+ int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
if (sechdrs[i].sh_type != SHT_RELA)
@@ -298,8 +333,14 @@
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
- /* sort by type, symbol index and addend */
- sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+ /*
+ * sort branch relocations requiring a PLT by type, symbol index
+ * and addend
+ */
+ nents = partition_branch_plt_relas(syms, rels, numrels,
+ sechdrs[i].sh_info);
+ if (nents)
+ sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL);
if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
core_plts += count_plts(syms, rels, numrels,
@@ -329,7 +370,7 @@
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
- tramp->sh_size = sizeof(struct plt_entry);
+ tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}
return 0;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03ff15b..2a1ad95 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -9,6 +9,7 @@
#include <linux/bitops.h>
#include <linux/elf.h>
+#include <linux/ftrace.h>
#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -314,21 +315,21 @@
/* MOVW instruction relocations. */
case R_AARCH64_MOVW_UABS_G0_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
AARCH64_INSN_IMM_MOVKZ);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
AARCH64_INSN_IMM_MOVKZ);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
AARCH64_INSN_IMM_MOVKZ);
@@ -396,7 +397,7 @@
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_ADR_PREL_PG_HI21:
ovf = reloc_insn_adrp(me, sechdrs, loc, val);
if (ovf && ovf != -ERANGE)
@@ -470,22 +471,58 @@
return -ENOEXEC;
}
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
{
const Elf_Shdr *s, *se;
const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
- if (strcmp(".altinstructions", secstrs + s->sh_name) == 0)
- apply_alternatives_module((void *)s->sh_addr, s->sh_size);
-#ifdef CONFIG_ARM64_MODULE_PLTS
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
- !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
- me->arch.ftrace_trampoline = (void *)s->sh_addr;
-#endif
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
}
+ return NULL;
+}
+
+static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
+{
+ *plt = get_plt_entry(addr, plt);
+}
+
+static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *mod)
+{
+#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+ const Elf_Shdr *s;
+ struct plt_entry *plts;
+
+ s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
+ if (!s)
+ return -ENOEXEC;
+
+ plts = (void *)s->sh_addr;
+
+ __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR);
+
+ mod->arch.ftrace_trampolines = plts;
+#endif
return 0;
}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ const Elf_Shdr *s;
+ s = find_section(hdr, sechdrs, ".altinstructions");
+ if (s)
+ apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+
+ return module_init_ftrace_plt(hdr, sechdrs, me);
+}
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
deleted file mode 100644
index 09a0eef..0000000
--- a/arch/arm64/kernel/module.lds
+++ /dev/null
@@ -1,5 +0,0 @@
-SECTIONS {
- .plt 0 (NOLOAD) : { BYTE(0) }
- .init.plt 0 (NOLOAD) : { BYTE(0) }
- .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
-}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
new file mode 100644
index 0000000..7a66a7d
--- /dev/null
+++ b/arch/arm64/kernel/mte.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/string.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/thread_info.h>
+#include <linux/uio.h>
+
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
+{
+ pte_t old_pte = READ_ONCE(*ptep);
+
+ if (check_swap && is_swap_pte(old_pte)) {
+ swp_entry_t entry = pte_to_swp_entry(old_pte);
+
+ if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
+ return;
+ }
+
+ mte_clear_page_tags(page_address(page));
+}
+
+void mte_sync_tags(pte_t *ptep, pte_t pte)
+{
+ struct page *page = pte_page(pte);
+ long i, nr_pages = compound_nr(page);
+ bool check_swap = nr_pages == 1;
+
+ /* if PG_mte_tagged is set, tags have already been initialised */
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+ mte_sync_page_tags(page, ptep, check_swap);
+ }
+}
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+ char *addr1, *addr2;
+ int ret;
+
+ addr1 = page_address(page1);
+ addr2 = page_address(page2);
+ ret = memcmp(addr1, addr2, PAGE_SIZE);
+
+ if (!system_supports_mte() || ret)
+ return ret;
+
+ /*
+ * If the page content is identical but at least one of the pages is
+ * tagged, return non-zero to avoid KSM merging. If only one of the
+ * pages is tagged, set_pte_at() may zero or change the tags of the
+ * other page via mte_sync_tags().
+ */
+ if (test_bit(PG_mte_tagged, &page1->flags) ||
+ test_bit(PG_mte_tagged, &page2->flags))
+ return addr1 != addr2;
+
+ return ret;
+}
+
+static void update_sctlr_el1_tcf0(u64 tcf0)
+{
+ /* ISB required for the kernel uaccess routines */
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
+ isb();
+}
+
+static void set_sctlr_el1_tcf0(u64 tcf0)
+{
+ /*
+ * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
+ * optimisation. Disable preemption so that it does not see
+ * the variable update before the SCTLR_EL1.TCF0 one.
+ */
+ preempt_disable();
+ current->thread.sctlr_tcf0 = tcf0;
+ update_sctlr_el1_tcf0(tcf0);
+ preempt_enable();
+}
+
+static void update_gcr_el1_excl(u64 incl)
+{
+ u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
+
+ /*
+ * Note that 'incl' is an include mask (controlled by the user via
+ * prctl()) while GCR_EL1 accepts an exclude mask.
+ * No need for ISB since this only affects EL0 currently, implicit
+ * with ERET.
+ */
+ sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
+}
+
+static void set_gcr_el1_excl(u64 incl)
+{
+ current->thread.gcr_user_incl = incl;
+ update_gcr_el1_excl(incl);
+}
+
+void flush_mte_state(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ /* clear any pending asynchronous tag fault */
+ dsb(ish);
+ write_sysreg_s(0, SYS_TFSRE0_EL1);
+ clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+ /* disable tag checking */
+ set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+ /* reset tag generation mask */
+ set_gcr_el1_excl(0);
+}
+
+void mte_thread_switch(struct task_struct *next)
+{
+ if (!system_supports_mte())
+ return;
+
+ /* avoid expensive SCTLR_EL1 accesses if no change */
+ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
+ update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+ update_gcr_el1_excl(next->thread.gcr_user_incl);
+}
+
+void mte_suspend_exit(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ update_gcr_el1_excl(current->thread.gcr_user_incl);
+}
+
+long set_mte_ctrl(struct task_struct *task, unsigned long arg)
+{
+ u64 tcf0;
+ u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT;
+
+ if (!system_supports_mte())
+ return 0;
+
+ switch (arg & PR_MTE_TCF_MASK) {
+ case PR_MTE_TCF_NONE:
+ tcf0 = SCTLR_EL1_TCF0_NONE;
+ break;
+ case PR_MTE_TCF_SYNC:
+ tcf0 = SCTLR_EL1_TCF0_SYNC;
+ break;
+ case PR_MTE_TCF_ASYNC:
+ tcf0 = SCTLR_EL1_TCF0_ASYNC;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (task != current) {
+ task->thread.sctlr_tcf0 = tcf0;
+ task->thread.gcr_user_incl = gcr_incl;
+ } else {
+ set_sctlr_el1_tcf0(tcf0);
+ set_gcr_el1_excl(gcr_incl);
+ }
+
+ return 0;
+}
+
+long get_mte_ctrl(struct task_struct *task)
+{
+ unsigned long ret;
+
+ if (!system_supports_mte())
+ return 0;
+
+ ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT;
+
+ switch (task->thread.sctlr_tcf0) {
+ case SCTLR_EL1_TCF0_NONE:
+ ret |= PR_MTE_TCF_NONE;
+ break;
+ case SCTLR_EL1_TCF0_SYNC:
+ ret |= PR_MTE_TCF_SYNC;
+ break;
+ case SCTLR_EL1_TCF0_ASYNC:
+ ret |= PR_MTE_TCF_ASYNC;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Access MTE tags in another process' address space as given in mm. Update
+ * the number of tags copied. Return 0 if any tags copied, error otherwise.
+ * Inspired by __access_remote_vm().
+ */
+static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
+ struct iovec *kiov, unsigned int gup_flags)
+{
+ struct vm_area_struct *vma;
+ void __user *buf = kiov->iov_base;
+ size_t len = kiov->iov_len;
+ int ret;
+ int write = gup_flags & FOLL_WRITE;
+
+ if (!access_ok(buf, len))
+ return -EFAULT;
+
+ if (mmap_read_lock_killable(mm))
+ return -EIO;
+
+ while (len) {
+ unsigned long tags, offset;
+ void *maddr;
+ struct page *page = NULL;
+
+ ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
+ &vma, NULL);
+ if (ret <= 0)
+ break;
+
+ /*
+ * Only copy tags if the page has been mapped as PROT_MTE
+ * (PG_mte_tagged set). Otherwise the tags are not valid and
+ * not accessible to user. Moreover, an mprotect(PROT_MTE)
+ * would cause the existing tags to be cleared if the page
+ * was never mapped with PROT_MTE.
+ */
+ if (!(vma->vm_flags & VM_MTE)) {
+ ret = -EOPNOTSUPP;
+ put_page(page);
+ break;
+ }
+ WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
+
+ /* limit access to the end of the page */
+ offset = offset_in_page(addr);
+ tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
+
+ maddr = page_address(page);
+ if (write) {
+ tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
+ set_page_dirty_lock(page);
+ } else {
+ tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
+ }
+ put_page(page);
+
+ /* error accessing the tracer's buffer */
+ if (!tags)
+ break;
+
+ len -= tags;
+ buf += tags;
+ addr += tags * MTE_GRANULE_SIZE;
+ }
+ mmap_read_unlock(mm);
+
+ /* return an error if no tags copied */
+ kiov->iov_len = buf - kiov->iov_base;
+ if (!kiov->iov_len) {
+ /* check for error accessing the tracee's address space */
+ if (ret <= 0)
+ return -EIO;
+ else
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy MTE tags in another process' address space at 'addr' to/from tracer's
+ * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
+ */
+static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
+ struct iovec *kiov, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return -EPERM;
+
+ if (!tsk->ptrace || (current != tsk->parent) ||
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptracer_capable(tsk, mm->user_ns))) {
+ mmput(mm);
+ return -EPERM;
+ }
+
+ ret = __access_remote_tags(mm, addr, kiov, gup_flags);
+ mmput(mm);
+
+ return ret;
+}
+
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret;
+ struct iovec kiov;
+ struct iovec __user *uiov = (void __user *)data;
+ unsigned int gup_flags = FOLL_FORCE;
+
+ if (!system_supports_mte())
+ return -EIO;
+
+ if (get_user(kiov.iov_base, &uiov->iov_base) ||
+ get_user(kiov.iov_len, &uiov->iov_len))
+ return -EFAULT;
+
+ if (request == PTRACE_POKEMTETAGS)
+ gup_flags |= FOLL_WRITE;
+
+ /* align addr to the MTE tag granule */
+ addr &= MTE_GRANULE_MASK;
+
+ ret = access_remote_tags(child, addr, &kiov, gup_flags);
+ if (!ret)
+ ret = put_user(kiov.iov_len, &uiov->iov_len);
+
+ return ret;
+}
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 4cfed91..c07d7a0 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -6,13 +6,157 @@
* Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
*/
+#define pr_fmt(fmt) "arm-pv: " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/cpuhotplug.h>
#include <linux/export.h>
+#include <linux/io.h>
#include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
#include <linux/types.h>
+
#include <asm/paravirt.h>
+#include <asm/pvclock-abi.h>
+#include <asm/smp_plat.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
struct paravirt_patch_template pv_ops;
EXPORT_SYMBOL_GPL(pv_ops);
+
+struct pv_time_stolen_time_region {
+ struct pvclock_vcpu_stolen_time *kaddr;
+};
+
+static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return stolen time in ns by asking the hypervisor */
+static u64 pv_steal_clock(int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+
+ reg = per_cpu_ptr(&stolen_time_region, cpu);
+
+ /*
+ * paravirt_steal_clock() may be called before the CPU
+ * online notification callback runs. Until the callback
+ * has run we just return zero.
+ */
+ if (!reg->kaddr)
+ return 0;
+
+ return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+}
+
+static int stolen_time_cpu_down_prepare(unsigned int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+ if (!reg->kaddr)
+ return 0;
+
+ memunmap(reg->kaddr);
+ memset(reg, 0, sizeof(*reg));
+
+ return 0;
+}
+
+static int stolen_time_cpu_online(unsigned int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+ struct arm_smccc_res res;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+ if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+ return -EINVAL;
+
+ reg->kaddr = memremap(res.a0,
+ sizeof(struct pvclock_vcpu_stolen_time),
+ MEMREMAP_WB);
+
+ if (!reg->kaddr) {
+ pr_warn("Failed to map stolen time data structure\n");
+ return -ENOMEM;
+ }
+
+ if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+ le32_to_cpu(reg->kaddr->attributes) != 0) {
+ pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int __init pv_time_init_stolen_time(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "hypervisor/arm/pvtime:online",
+ stolen_time_cpu_online,
+ stolen_time_cpu_down_prepare);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static bool __init has_pv_steal_clock(void)
+{
+ struct arm_smccc_res res;
+
+ /* To detect the presence of PV time support we require SMCCC 1.1+ */
+ if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE)
+ return false;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
+
+ if (res.a0 != SMCCC_RET_SUCCESS)
+ return false;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES,
+ ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+ return (res.a0 == SMCCC_RET_SUCCESS);
+}
+
+int __init pv_time_init(void)
+{
+ int ret;
+
+ if (!has_pv_steal_clock())
+ return 0;
+
+ ret = pv_time_init_stolen_time();
+ if (ret)
+ return ret;
+
+ pv_ops.time.steal_clock = pv_steal_clock;
+
+ static_key_slow_inc(¶virt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(¶virt_steal_rq_enabled);
+
+ pr_info("using stolen time PV\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 570988c..1006ed2 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -117,7 +117,7 @@
struct device *dev = &root->device->dev;
struct resource *bus_res = &root->secondary;
u16 seg = root->segment;
- struct pci_ecam_ops *ecam_ops;
+ const struct pci_ecam_ops *ecam_ops;
struct resource cfgres;
struct acpi_device *adev;
struct pci_config_window *cfg;
@@ -185,7 +185,7 @@
root_ops->release_info = pci_acpi_generic_release_info;
root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = &ri->cfg->ops->pci_ops;
+ root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
if (!bus)
return NULL;
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index b0e03e0..58ae55d 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -102,7 +102,9 @@
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+
+ if (guest_cbs && guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
@@ -137,19 +139,20 @@
* whist unwinding the stackframe and is like a subroutine return so we use
* the PC.
*/
-static int callchain_trace(struct stackframe *frame, void *data)
+static bool callchain_trace(void *data, unsigned long pc)
{
struct perf_callchain_entry_ctx *entry = data;
- perf_callchain_store(entry, frame->pc);
- return 0;
+ perf_callchain_store(entry, pc);
+ return true;
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
struct stackframe frame;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (guest_cbs && guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
@@ -160,18 +163,21 @@
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return perf_guest_cbs->get_guest_ip();
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+
+ if (guest_cbs && guest_cbs->is_in_guest())
+ return guest_cbs->get_guest_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
int misc = 0;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- if (perf_guest_cbs->is_user_mode())
+ if (guest_cbs && guest_cbs->is_in_guest()) {
+ if (guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 19128d9..cdb3d45 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -13,12 +13,15 @@
#include <asm/sysreg.h>
#include <asm/virt.h>
+#include <clocksource/arm_arch_timer.h>
+
#include <linux/acpi.h>
#include <linux/clocksource.h>
#include <linux/kvm_host.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
+#include <linux/sched_clock.h>
#include <linux/smp.h>
/* ARMv8 Cortex-A53 specific event types. */
@@ -66,6 +69,9 @@
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD,
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD,
+
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
@@ -158,133 +164,93 @@
return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
-#define ARMV8_EVENT_ATTR(name, config) \
- PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
- config, armv8pmu_events_sysfs_show)
-
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
-ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
-ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-/* Don't expose the chain event in /sys, since it's useless in isolation */
-ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED);
-ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND);
-ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND);
-ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB);
-ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB);
-ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE);
-ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
-ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
-ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
-ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
-ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
-ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
-ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
-ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
-ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
-ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
-ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
-ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
-ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
-ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
+#define ARMV8_EVENT_ATTR(name, config) \
+ (&((struct perf_pmu_events_attr) { \
+ .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \
+ .id = config, \
+ }).attr.attr)
static struct attribute *armv8_pmuv3_event_attrs[] = {
- &armv8_event_attr_sw_incr.attr.attr,
- &armv8_event_attr_l1i_cache_refill.attr.attr,
- &armv8_event_attr_l1i_tlb_refill.attr.attr,
- &armv8_event_attr_l1d_cache_refill.attr.attr,
- &armv8_event_attr_l1d_cache.attr.attr,
- &armv8_event_attr_l1d_tlb_refill.attr.attr,
- &armv8_event_attr_ld_retired.attr.attr,
- &armv8_event_attr_st_retired.attr.attr,
- &armv8_event_attr_inst_retired.attr.attr,
- &armv8_event_attr_exc_taken.attr.attr,
- &armv8_event_attr_exc_return.attr.attr,
- &armv8_event_attr_cid_write_retired.attr.attr,
- &armv8_event_attr_pc_write_retired.attr.attr,
- &armv8_event_attr_br_immed_retired.attr.attr,
- &armv8_event_attr_br_return_retired.attr.attr,
- &armv8_event_attr_unaligned_ldst_retired.attr.attr,
- &armv8_event_attr_br_mis_pred.attr.attr,
- &armv8_event_attr_cpu_cycles.attr.attr,
- &armv8_event_attr_br_pred.attr.attr,
- &armv8_event_attr_mem_access.attr.attr,
- &armv8_event_attr_l1i_cache.attr.attr,
- &armv8_event_attr_l1d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_cache.attr.attr,
- &armv8_event_attr_l2d_cache_refill.attr.attr,
- &armv8_event_attr_l2d_cache_wb.attr.attr,
- &armv8_event_attr_bus_access.attr.attr,
- &armv8_event_attr_memory_error.attr.attr,
- &armv8_event_attr_inst_spec.attr.attr,
- &armv8_event_attr_ttbr_write_retired.attr.attr,
- &armv8_event_attr_bus_cycles.attr.attr,
- &armv8_event_attr_l1d_cache_allocate.attr.attr,
- &armv8_event_attr_l2d_cache_allocate.attr.attr,
- &armv8_event_attr_br_retired.attr.attr,
- &armv8_event_attr_br_mis_pred_retired.attr.attr,
- &armv8_event_attr_stall_frontend.attr.attr,
- &armv8_event_attr_stall_backend.attr.attr,
- &armv8_event_attr_l1d_tlb.attr.attr,
- &armv8_event_attr_l1i_tlb.attr.attr,
- &armv8_event_attr_l2i_cache.attr.attr,
- &armv8_event_attr_l2i_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache_allocate.attr.attr,
- &armv8_event_attr_l3d_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache.attr.attr,
- &armv8_event_attr_l3d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_tlb_refill.attr.attr,
- &armv8_event_attr_l2i_tlb_refill.attr.attr,
- &armv8_event_attr_l2d_tlb.attr.attr,
- &armv8_event_attr_l2i_tlb.attr.attr,
- &armv8_event_attr_remote_access.attr.attr,
- &armv8_event_attr_ll_cache.attr.attr,
- &armv8_event_attr_ll_cache_miss.attr.attr,
- &armv8_event_attr_dtlb_walk.attr.attr,
- &armv8_event_attr_itlb_walk.attr.attr,
- &armv8_event_attr_ll_cache_rd.attr.attr,
- &armv8_event_attr_ll_cache_miss_rd.attr.attr,
- &armv8_event_attr_remote_access_rd.attr.attr,
- &armv8_event_attr_sample_pop.attr.attr,
- &armv8_event_attr_sample_feed.attr.attr,
- &armv8_event_attr_sample_filtrate.attr.attr,
- &armv8_event_attr_sample_collision.attr.attr,
+ ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+ ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+ ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+ ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+ ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+ ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+ ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+ ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+ ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+ ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+ ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+ ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+ ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+ ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+ ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+ ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+ ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+ ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+ ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+ /* Don't expose the chain event in /sys, since it's useless in isolation */
+ ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+ ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+ ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+ ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+ ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+ ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+ ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+ ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+ ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+ ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+ ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+ ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+ ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+ ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+ ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+ ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+ ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED),
+ ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC),
+ ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL),
+ ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND),
+ ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT),
+ ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+ ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+ ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+ ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
+ ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES),
+ ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM),
+ ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS),
+ ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
+ ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED),
+ ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD),
+ ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR),
NULL,
};
@@ -339,13 +305,44 @@
.attrs = armv8_pmuv3_format_attrs,
};
+static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
+
+ return sysfs_emit(page, "0x%08x\n", slots);
+}
+
+static DEVICE_ATTR_RO(slots);
+
+static struct attribute *armv8_pmuv3_caps_attrs[] = {
+ &dev_attr_slots.attr,
+ NULL,
+};
+
+static struct attribute_group armv8_pmuv3_caps_attr_group = {
+ .name = "caps",
+ .attrs = armv8_pmuv3_caps_attrs,
+};
+
/*
* Perf Events' indices
*/
#define ARMV8_IDX_CYCLE_COUNTER 0
#define ARMV8_IDX_COUNTER0 1
-#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
- (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+
+/*
+ * We unconditionally enable ARMv8.5-PMU long event counter support
+ * (64-bit events) where supported. Indicate if this arm_pmu has long
+ * event counter support.
+ */
+static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
+{
+ return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5);
+}
/*
* We must chain two programmable counters for 64 bit events,
@@ -356,9 +353,11 @@
static inline bool armv8pmu_event_is_chained(struct perf_event *event)
{
int idx = event->hw.idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
return !WARN_ON(idx < 0) &&
armv8pmu_event_is_64bit(event) &&
+ !armv8pmu_has_long_event(cpu_pmu) &&
(idx != ARMV8_IDX_CYCLE_COUNTER);
}
@@ -372,6 +371,73 @@
#define ARMV8_IDX_TO_COUNTER(x) \
(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
+/*
+ * This code is really good
+ */
+
+#define PMEVN_CASE(n, case_macro) \
+ case n: case_macro(n); break
+
+#define PMEVN_SWITCH(x, case_macro) \
+ do { \
+ switch (x) { \
+ PMEVN_CASE(0, case_macro); \
+ PMEVN_CASE(1, case_macro); \
+ PMEVN_CASE(2, case_macro); \
+ PMEVN_CASE(3, case_macro); \
+ PMEVN_CASE(4, case_macro); \
+ PMEVN_CASE(5, case_macro); \
+ PMEVN_CASE(6, case_macro); \
+ PMEVN_CASE(7, case_macro); \
+ PMEVN_CASE(8, case_macro); \
+ PMEVN_CASE(9, case_macro); \
+ PMEVN_CASE(10, case_macro); \
+ PMEVN_CASE(11, case_macro); \
+ PMEVN_CASE(12, case_macro); \
+ PMEVN_CASE(13, case_macro); \
+ PMEVN_CASE(14, case_macro); \
+ PMEVN_CASE(15, case_macro); \
+ PMEVN_CASE(16, case_macro); \
+ PMEVN_CASE(17, case_macro); \
+ PMEVN_CASE(18, case_macro); \
+ PMEVN_CASE(19, case_macro); \
+ PMEVN_CASE(20, case_macro); \
+ PMEVN_CASE(21, case_macro); \
+ PMEVN_CASE(22, case_macro); \
+ PMEVN_CASE(23, case_macro); \
+ PMEVN_CASE(24, case_macro); \
+ PMEVN_CASE(25, case_macro); \
+ PMEVN_CASE(26, case_macro); \
+ PMEVN_CASE(27, case_macro); \
+ PMEVN_CASE(28, case_macro); \
+ PMEVN_CASE(29, case_macro); \
+ PMEVN_CASE(30, case_macro); \
+ default: WARN(1, "Invalid PMEV* index\n"); \
+ } \
+ } while (0)
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+}
+
static inline u32 armv8pmu_pmcr_read(void)
{
return read_sysreg(pmcr_el0);
@@ -389,28 +455,16 @@
return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
}
-static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
-{
- return idx >= ARMV8_IDX_CYCLE_COUNTER &&
- idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
-}
-
static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
{
return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
}
-static inline void armv8pmu_select_counter(int idx)
+static inline u64 armv8pmu_read_evcntr(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(counter, pmselr_el0);
- isb();
-}
-static inline u32 armv8pmu_read_evcntr(int idx)
-{
- armv8pmu_select_counter(idx);
- return read_sysreg(pmxevcntr_el0);
+ return read_pmevcntrn(counter);
}
static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
@@ -424,28 +478,63 @@
return val;
}
-static u64 armv8pmu_read_counter(struct perf_event *event)
+/*
+ * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP
+ * is set the event counters also become 64-bit counters. Unless the
+ * user has requested a long counter (attr.config1) then we want to
+ * interrupt upon 32-bit overflow - we achieve this by applying a bias.
+ */
+static bool armv8pmu_event_needs_bias(struct perf_event *event)
{
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- u64 value = 0;
- if (!armv8pmu_counter_valid(cpu_pmu, idx))
- pr_err("CPU%u reading wrong counter %d\n",
- smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER)
- value = read_sysreg(pmccntr_el0);
- else
- value = armv8pmu_read_hw_counter(event);
+ if (armv8pmu_event_is_64bit(event))
+ return false;
+
+ if (armv8pmu_has_long_event(cpu_pmu) ||
+ idx == ARMV8_IDX_CYCLE_COUNTER)
+ return true;
+
+ return false;
+}
+
+static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value)
+{
+ if (armv8pmu_event_needs_bias(event))
+ value |= GENMASK(63, 32);
return value;
}
-static inline void armv8pmu_write_evcntr(int idx, u32 value)
+static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value)
{
- armv8pmu_select_counter(idx);
- write_sysreg(value, pmxevcntr_el0);
+ if (armv8pmu_event_needs_bias(event))
+ value &= ~GENMASK(63, 32);
+
+ return value;
+}
+
+static u64 armv8pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 value = 0;
+
+ if (idx == ARMV8_IDX_CYCLE_COUNTER)
+ value = read_sysreg(pmccntr_el0);
+ else
+ value = armv8pmu_read_hw_counter(event);
+
+ return armv8pmu_unbias_long_counter(event, value);
+}
+
+static inline void armv8pmu_write_evcntr(int idx, u64 value)
+{
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
+ write_pmevcntrn(counter, value);
}
static inline void armv8pmu_write_hw_counter(struct perf_event *event,
@@ -463,32 +552,23 @@
static void armv8pmu_write_counter(struct perf_event *event, u64 value)
{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- if (!armv8pmu_counter_valid(cpu_pmu, idx))
- pr_err("CPU%u writing wrong counter %d\n",
- smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
- /*
- * The cycles counter is really a 64-bit counter.
- * When treating it as a 32-bit counter, we only count
- * the lower 32 bits, and set the upper 32-bits so that
- * we get an interrupt upon 32-bit overflow.
- */
- if (!armv8pmu_event_is_64bit(event))
- value |= 0xffffffff00000000ULL;
+ value = armv8pmu_bias_long_counter(event, value);
+
+ if (idx == ARMV8_IDX_CYCLE_COUNTER)
write_sysreg(value, pmccntr_el0);
- } else
+ else
armv8pmu_write_hw_counter(event, value);
}
static inline void armv8pmu_write_evtype(int idx, u32 val)
{
- armv8pmu_select_counter(idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
val &= ARMV8_PMU_EVTYPE_MASK;
- write_sysreg(val, pmxevtyper_el0);
+ write_pmevtypern(counter, val);
}
static inline void armv8pmu_write_event_type(struct perf_event *event)
@@ -508,90 +588,91 @@
armv8pmu_write_evtype(idx - 1, hwc->config_base);
armv8pmu_write_evtype(idx, chain_evt);
} else {
- armv8pmu_write_evtype(idx, hwc->config_base);
+ if (idx == ARMV8_IDX_CYCLE_COUNTER)
+ write_sysreg(hwc->config_base, pmccfiltr_el0);
+ else
+ armv8pmu_write_evtype(idx, hwc->config_base);
}
}
-static inline int armv8pmu_enable_counter(int idx)
+static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmcntenset_el0);
- return idx;
+ int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+ u32 mask = BIT(counter);
+
+ if (armv8pmu_event_is_chained(event))
+ mask |= BIT(counter - 1);
+ return mask;
+}
+
+static inline void armv8pmu_enable_counter(u32 mask)
+{
+ /*
+ * Make sure event configuration register writes are visible before we
+ * enable the counter.
+ * */
+ isb();
+ write_sysreg(mask, pmcntenset_el0);
}
static inline void armv8pmu_enable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
- int idx = event->hw.idx;
- u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
+ u32 mask = armv8pmu_event_cnten_mask(event);
- if (armv8pmu_event_is_chained(event))
- counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
-
- kvm_set_pmu_events(counter_bits, attr);
+ kvm_set_pmu_events(mask, attr);
/* We rely on the hypervisor switch code to enable guest counters */
- if (!kvm_pmu_counter_deferred(attr)) {
- armv8pmu_enable_counter(idx);
- if (armv8pmu_event_is_chained(event))
- armv8pmu_enable_counter(idx - 1);
- }
+ if (!kvm_pmu_counter_deferred(attr))
+ armv8pmu_enable_counter(mask);
}
-static inline int armv8pmu_disable_counter(int idx)
+static inline void armv8pmu_disable_counter(u32 mask)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmcntenclr_el0);
- return idx;
+ write_sysreg(mask, pmcntenclr_el0);
+ /*
+ * Make sure the effects of disabling the counter are visible before we
+ * start configuring the event.
+ */
+ isb();
}
static inline void armv8pmu_disable_event_counter(struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct perf_event_attr *attr = &event->attr;
- int idx = hwc->idx;
- u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
+ u32 mask = armv8pmu_event_cnten_mask(event);
- if (armv8pmu_event_is_chained(event))
- counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
-
- kvm_clr_pmu_events(counter_bits);
+ kvm_clr_pmu_events(mask);
/* We rely on the hypervisor switch code to disable guest counters */
- if (!kvm_pmu_counter_deferred(attr)) {
- if (armv8pmu_event_is_chained(event))
- armv8pmu_disable_counter(idx - 1);
- armv8pmu_disable_counter(idx);
- }
+ if (!kvm_pmu_counter_deferred(attr))
+ armv8pmu_disable_counter(mask);
}
-static inline int armv8pmu_enable_intens(int idx)
+static inline void armv8pmu_enable_intens(u32 mask)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmintenset_el1);
- return idx;
+ write_sysreg(mask, pmintenset_el1);
}
-static inline int armv8pmu_enable_event_irq(struct perf_event *event)
+static inline void armv8pmu_enable_event_irq(struct perf_event *event)
{
- return armv8pmu_enable_intens(event->hw.idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+ armv8pmu_enable_intens(BIT(counter));
}
-static inline int armv8pmu_disable_intens(int idx)
+static inline void armv8pmu_disable_intens(u32 mask)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmintenclr_el1);
+ write_sysreg(mask, pmintenclr_el1);
isb();
/* Clear the overflow flag in case an interrupt is pending. */
- write_sysreg(BIT(counter), pmovsclr_el0);
+ write_sysreg(mask, pmovsclr_el0);
isb();
-
- return idx;
}
-static inline int armv8pmu_disable_event_irq(struct perf_event *event)
+static inline void armv8pmu_disable_event_irq(struct perf_event *event)
{
- return armv8pmu_disable_intens(event->hw.idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+ armv8pmu_disable_intens(BIT(counter));
}
static inline u32 armv8pmu_getreset_flags(void)
@@ -610,15 +691,10 @@
static void armv8pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/*
* Disable counter
@@ -626,7 +702,7 @@
armv8pmu_disable_event_counter(event);
/*
- * Set event (if destined for PMNx counters).
+ * Set event.
*/
armv8pmu_write_event_type(event);
@@ -639,21 +715,10 @@
* Enable counter
*/
armv8pmu_enable_event_counter(event);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv8pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- /*
- * Disable counter and interrupt
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
/*
* Disable counter
*/
@@ -663,30 +728,18 @@
* Disable interrupt for this counter
*/
armv8pmu_disable_event_irq(event);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
@@ -739,20 +792,16 @@
if (!armpmu_event_set_period(event))
continue;
+ /*
+ * Perf event overflow will queue the processing of the event as
+ * an irq_work which will be taken care of in the handling of
+ * IPI_IRQ_WORK.
+ */
if (perf_event_overflow(event, &data, regs))
cpu_pmu->disable(event);
}
armv8pmu_start(cpu_pmu);
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
return IRQ_HANDLED;
}
@@ -805,7 +854,8 @@
/*
* Otherwise use events counters
*/
- if (armv8pmu_event_is_64bit(event))
+ if (armv8pmu_event_is_64bit(event) &&
+ !armv8pmu_has_long_event(cpu_pmu))
return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
else
return armv8pmu_get_single_idx(cpuc, cpu_pmu);
@@ -877,13 +927,11 @@
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
+ u32 pmcr;
/* The counter and interrupt enable registers are unknown at reset. */
- for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv8pmu_disable_counter(idx);
- armv8pmu_disable_intens(idx);
- }
+ armv8pmu_disable_counter(U32_MAX);
+ armv8pmu_disable_intens(U32_MAX);
/* Clear the counters we flip at guest entry/exit */
kvm_clr_pmu_events(U32_MAX);
@@ -892,8 +940,13 @@
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
*/
- armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C |
- ARMV8_PMU_PMCR_LC);
+ pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC;
+
+ /* Enable long event counter support where available */
+ if (armv8pmu_has_long_event(cpu_pmu))
+ pmcr |= ARMV8_PMU_PMCR_LP;
+
+ armv8pmu_pmcr_write(pmcr);
}
static int __armv8_pmuv3_map_event(struct perf_event *event,
@@ -976,6 +1029,7 @@
if (pmuver == 0xf || pmuver == 0)
return;
+ cpu_pmu->pmuver = pmuver;
probe->present = true;
/* Read the nb of CNTx counters supported from PMNC */
@@ -996,6 +1050,12 @@
bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap,
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+ /* store PMMIR_EL1 register for sysfs */
+ if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31)))
+ cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
+ else
+ cpu_pmu->reg_pmmir = 0;
}
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1015,7 +1075,11 @@
return probe.present ? 0 : -ENODEV;
}
-static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
+ int (*map_event)(struct perf_event *event),
+ const struct attribute_group *events,
+ const struct attribute_group *format,
+ const struct attribute_group *caps)
{
int ret = armv8pmu_probe_pmu(cpu_pmu);
if (ret)
@@ -1034,144 +1098,135 @@
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->filter_match = armv8pmu_filter_match;
+ cpu_pmu->name = name;
+ cpu_pmu->map_event = map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ?
+ events : &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
+ format : &armv8_pmuv3_format_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ?
+ caps : &armv8_pmuv3_caps_attr_group;
+
return 0;
}
+static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name,
+ int (*map_event)(struct perf_event *event))
+{
+ return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL);
+}
+
static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3",
+ armv8_pmuv3_map_event);
+}
- cpu_pmu->name = "armv8_pmuv3";
- cpu_pmu->map_event = armv8_pmuv3_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34",
+ armv8_pmuv3_map_event);
}
static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a35";
- cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35",
+ armv8_a53_map_event);
}
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53",
+ armv8_a53_map_event);
+}
- cpu_pmu->name = "armv8_cortex_a53";
- cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55",
+ armv8_pmuv3_map_event);
}
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57",
+ armv8_a57_map_event);
+}
- cpu_pmu->name = "armv8_cortex_a57";
- cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65",
+ armv8_pmuv3_map_event);
}
static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a72";
- cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72",
+ armv8_a57_map_event);
}
static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73",
+ armv8_a73_map_event);
+}
- cpu_pmu->name = "armv8_cortex_a73";
- cpu_pmu->map_event = armv8_a73_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75",
+ armv8_pmuv3_map_event);
+}
- return 0;
+static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76",
+ armv8_pmuv3_map_event);
+}
+
+static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77",
+ armv8_pmuv3_map_event);
+}
+
+static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1",
+ armv8_pmuv3_map_event);
+}
+
+static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1",
+ armv8_pmuv3_map_event);
}
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cavium_thunder";
- cpu_pmu->map_event = armv8_thunder_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder",
+ armv8_thunder_map_event);
}
static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_brcm_vulcan";
- cpu_pmu->map_event = armv8_vulcan_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan",
+ armv8_vulcan_map_event);
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
+ {.compatible = "arm,cortex-a34-pmu", .data = armv8_a34_pmu_init},
{.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
+ {.compatible = "arm,cortex-a55-pmu", .data = armv8_a55_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
+ {.compatible = "arm,cortex-a65-pmu", .data = armv8_a65_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
{.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init},
+ {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init},
+ {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init},
+ {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init},
+ {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init},
+ {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
{},
@@ -1203,28 +1258,54 @@
void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
- u32 freq;
- u32 shift;
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
- /*
- * Internal timekeeping for enabled/running/stopped times
- * is always computed with the sched_clock.
- */
- freq = arch_timer_get_rate();
- userpg->cap_user_time = 1;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
- clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
- NSEC_PER_SEC, 0);
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ if (rd->read_sched_clock != arch_timer_read_counter)
+ return;
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
/*
* time_shift is not expected to be greater than 31 due to
* the original published conversion algorithm shifting a
* 32-bit value (now specifies a 64-bit value) - refer
* perf_event_mmap_page documentation in perf_event.h.
*/
- if (shift == 32) {
- shift = 31;
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
userpg->time_mult >>= 1;
}
- userpg->time_shift = (u16)shift;
- userpg->time_offset = -now;
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 666b225..f6f58e6 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -16,7 +16,7 @@
/*
* Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but
- * we're stuck with it for ABI compatability reasons.
+ * we're stuck with it for ABI compatibility reasons.
*
* For a 32-bit consumer inspecting a 32-bit task, then it will look at
* the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h).
@@ -73,8 +73,7 @@
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index c507b58..adb955f 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/prctl.h>
#include <linux/random.h>
@@ -9,7 +10,7 @@
int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
{
- struct ptrauth_keys *keys = &tsk->thread.keys_user;
+ struct ptrauth_keys_user *keys = &tsk->thread.keys_user;
unsigned long addr_key_mask = PR_PAC_APIAKEY | PR_PAC_APIBKEY |
PR_PAC_APDAKEY | PR_PAC_APDBKEY;
unsigned long key_mask = addr_key_mask | PR_PAC_APGAKEY;
@@ -17,9 +18,11 @@
if (!system_supports_address_auth() && !system_supports_generic_auth())
return -EINVAL;
+ if (is_compat_thread(task_thread_info(tsk)))
+ return -EINVAL;
+
if (!arg) {
- ptrauth_keys_init(keys);
- ptrauth_keys_switch(keys);
+ ptrauth_keys_init_user(keys);
return 0;
}
@@ -41,7 +44,5 @@
if (arg & PR_PAC_APGAKEY)
get_random_bytes(&keys->apga, sizeof(keys->apga));
- ptrauth_keys_switch(keys);
-
return 0;
}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index b78fac9..104101f 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -29,7 +29,8 @@
aarch64_insn_is_msr_imm(insn) ||
aarch64_insn_is_msr_reg(insn) ||
aarch64_insn_is_exception(insn) ||
- aarch64_insn_is_eret(insn))
+ aarch64_insn_is_eret(insn) ||
+ aarch64_insn_is_eret_auth(insn))
return false;
/*
@@ -42,11 +43,13 @@
!= AARCH64_INSN_SPCLREG_DAIF;
/*
- * The HINT instruction is is problematic when single-stepping,
- * except for the NOP case.
+ * The HINT instruction is steppable only if it is in whitelist
+ * and the rest of other such instructions are blocked for
+ * single stepping as they may cause exception or other
+ * unintended behaviour.
*/
if (aarch64_insn_is_hint(insn))
- return aarch64_insn_is_nop(insn);
+ return aarch64_insn_is_steppable_hint(insn);
return true;
}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c445282..798c3e7 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -36,25 +36,16 @@
static void __kprobes
post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
-{
- void *addrs[1];
- u32 insns[1];
-
- addrs[0] = addr;
- insns[0] = opcode;
-
- return aarch64_insn_patch_text(addrs, insns, 1);
-}
-
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
- /* prepare insn slot */
- patch_text(p->ainsn.api.insn, p->opcode);
+ kprobe_opcode_t *addr = p->ainsn.api.insn;
+ void *addrs[] = {addr, addr + 1};
+ u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS};
- flush_icache_range((uintptr_t) (p->ainsn.api.insn),
- (uintptr_t) (p->ainsn.api.insn) +
- MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ /* prepare insn slot */
+ aarch64_insn_patch_text(addrs, insns, 2);
+
+ flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE));
/*
* Needs restoring of return address after stepping xol.
@@ -120,27 +111,26 @@
void *alloc_insn_page(void)
{
- void *page;
-
- page = vmalloc_exec(PAGE_SIZE);
- if (page) {
- set_memory_ro((unsigned long)page, 1);
- set_vm_flush_reset_perms(page);
- }
-
- return page;
+ return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
}
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, BRK64_OPCODE_KPROBES);
+ void *addr = p->addr;
+ u32 insn = BRK64_OPCODE_KPROBES;
+
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
/* disarm kprobe: remove breakpoint from text */
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, p->opcode);
+ void *addr = p->addr;
+
+ aarch64_insn_patch_text(&addr, &p->opcode, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
@@ -169,20 +159,15 @@
}
/*
- * Interrupts need to be disabled before single-step mode is set, and not
- * reenabled until after single-step mode ends.
- * Without disabling interrupt on local CPU, there is a chance of
- * interrupt occurrence in the period of exception return and start of
- * out-of-line single-step, that result in wrongly single stepping
- * into the interrupt handler.
+ * Mask all of DAIF while executing the instruction out-of-line, to keep things
+ * simple and avoid nesting exceptions. Interrupts do have to be disabled since
+ * the kprobe state is per-CPU and doesn't get migrated.
*/
static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
struct pt_regs *regs)
{
kcb->saved_irqflag = regs->pstate & DAIF_MASK;
- regs->pstate |= PSR_I_BIT;
- /* Unmask PSTATE.D for enabling software step exceptions. */
- regs->pstate &= ~PSR_D_BIT;
+ regs->pstate |= DAIF_MASK;
}
static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
@@ -225,10 +210,7 @@
slot = (unsigned long)p->ainsn.api.insn;
set_ss_context(kcb, slot); /* mark pending ss */
-
- /* IRQs and single stepping do not mix well. */
kprobes_save_local_irqflag(kcb, regs);
- kernel_enable_single_step(regs);
instruction_pointer_set(regs, slot);
} else {
/* insn simulation */
@@ -279,12 +261,8 @@
}
/* call post handler */
kcb->kprobe_status = KPROBE_HIT_SSDONE;
- if (cur->post_handler) {
- /* post_handler can hit breakpoint and single step
- * again, so we enable D-flag for recursive exception.
- */
+ if (cur->post_handler)
cur->post_handler(cur, regs, 0);
- }
reset_current_kprobe();
}
@@ -308,12 +286,12 @@
if (!instruction_pointer(regs))
BUG();
- kernel_disable_single_step();
-
- if (kcb->kprobe_status == KPROBE_REENTER)
+ if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(kcb);
- else
+ } else {
+ kprobes_restore_local_irqflag(kcb, regs);
reset_current_kprobe();
+ }
break;
case KPROBE_HIT_ACTIVE:
@@ -371,10 +349,6 @@
* pre-handler and it returned non-zero, it will
* modify the execution path and no need to single
* stepping. Let's just reset current kprobe and exit.
- *
- * pre_handler can hit a breakpoint and can step thru
- * before return, keep PSTATE D-flag enabled until
- * pre_handler return back.
*/
if (!p->pre_handler || !p->pre_handler(p, regs)) {
setup_singlestep(p, regs, kcb, 0);
@@ -405,7 +379,7 @@
}
static int __kprobes
-kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
int retval;
@@ -415,16 +389,15 @@
if (retval == DBG_HOOK_HANDLED) {
kprobes_restore_local_irqflag(kcb, regs);
- kernel_disable_single_step();
-
post_kprobe_handler(kcb, regs);
}
return retval;
}
-static struct step_hook kprobes_step_hook = {
- .fn = kprobe_single_step_handler,
+static struct break_hook kprobes_break_ss_hook = {
+ .imm = KPROBES_BRK_SS_IMM,
+ .fn = kprobe_breakpoint_ss_handler,
};
static int __kprobes
@@ -455,10 +428,6 @@
(unsigned long)__irqentry_text_end);
if (ret)
return ret;
- ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start,
- (unsigned long)__exception_text_end);
- if (ret)
- return ret;
ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
(unsigned long)__idmap_text_end);
if (ret)
@@ -474,87 +443,15 @@
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =
- (unsigned long)&kretprobe_trampoline;
- kprobe_opcode_t *correct_ret_addr = NULL;
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because multiple functions in the call path have
- * return probes installed on them, and/or more than one
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always pushed into the head of the list
- * - when multiple return probes are registered for the same
- * function, the (chronologically) first instance's ret_addr
- * will be the real return address, and all the rest will
- * point to kretprobe_trampoline.
- */
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long)ri->ret_addr;
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
- correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- if (ri->rp && ri->rp->handler) {
- __this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
- ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
- }
-
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_hash_unlock(current, &flags);
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- return (void *)orig_ret_address;
+ return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline,
+ (void *)kernel_stack_pointer(regs));
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
+ ri->fp = (void *)kernel_stack_pointer(regs);
/* replace return addr (x30) with trampoline */
regs->regs[30] = (long)&kretprobe_trampoline;
@@ -568,7 +465,7 @@
int __init arch_init_kprobes(void)
{
register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_step_hook(&kprobes_step_hook);
+ register_kernel_break_hook(&kprobes_break_ss_hook);
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 45dce03..890ca72 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -61,7 +61,7 @@
ldp x28, x29, [sp, #S_X28]
.endm
-ENTRY(kretprobe_trampoline)
+SYM_CODE_START(kretprobe_trampoline)
sub sp, sp, #S_FRAME_SIZE
save_all_base_regs
@@ -79,4 +79,4 @@
add sp, sp, #S_FRAME_SIZE
ret
-ENDPROC(kretprobe_trampoline)
+SYM_CODE_END(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7d7cfa1..22275d8 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -11,6 +11,7 @@
#include <linux/compat.h>
#include <linux/efi.h>
+#include <linux/elf.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
@@ -18,7 +19,9 @@
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/lockdep.h>
+#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/nospec.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/unistd.h>
@@ -50,13 +53,14 @@
#include <asm/exec.h>
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
+#include <asm/mte.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
-unsigned long __stack_chk_guard __read_mostly;
+unsigned long __stack_chk_guard __ro_after_init;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
@@ -68,13 +72,13 @@
void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-static void __cpu_do_idle(void)
+static void noinstr __cpu_do_idle(void)
{
dsb(sy);
wfi();
}
-static void __cpu_do_idle_irqprio(void)
+static void noinstr __cpu_do_idle_irqprio(void)
{
unsigned long pmr;
unsigned long daif_bits;
@@ -104,7 +108,7 @@
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
-void cpu_do_idle(void)
+void noinstr cpu_do_idle(void)
{
if (system_uses_irq_prio_masking())
__cpu_do_idle_irqprio();
@@ -115,16 +119,14 @@
/*
* This is our default idle handler.
*/
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
* tricks
*/
- trace_cpu_idle_rcuidle(1, smp_processor_id());
cpu_do_idle();
- local_irq_enable();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+ raw_local_irq_enable();
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -141,11 +143,11 @@
* to execute e.g. a RAM-based pin loop is not sufficient. This allows the
* kexec'd kernel to use any and all RAM as it sees fit, without having to
* avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
+ * functionality embodied in smpt_shutdown_nonboot_cpus() to achieve this.
*/
void machine_shutdown(void)
{
- disable_nonboot_cpus();
+ smp_shutdown_nonboot_cpus(reboot_cpu);
}
/*
@@ -209,6 +211,15 @@
while (1);
}
+#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str
+static const char *const btypes[] = {
+ bstr(NONE, "--"),
+ bstr( JC, "jc"),
+ bstr( C, "-c"),
+ bstr( J , "j-")
+};
+#undef bstr
+
static void print_pstate(struct pt_regs *regs)
{
u64 pstate = regs->pstate;
@@ -227,7 +238,10 @@
pstate & PSR_AA32_I_BIT ? 'I' : 'i',
pstate & PSR_AA32_F_BIT ? 'F' : 'f');
} else {
- printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n",
+ const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
+ PSR_BTYPE_SHIFT];
+
+ printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n",
pstate,
pstate & PSR_N_BIT ? 'N' : 'n',
pstate & PSR_Z_BIT ? 'Z' : 'z',
@@ -238,7 +252,9 @@
pstate & PSR_I_BIT ? 'I' : 'i',
pstate & PSR_F_BIT ? 'F' : 'f',
pstate & PSR_PAN_BIT ? '+' : '-',
- pstate & PSR_UAO_BIT ? '+' : '-');
+ pstate & PSR_UAO_BIT ? '+' : '-',
+ pstate & PSR_TCO_BIT ? '+' : '-',
+ btype_str);
}
}
@@ -262,7 +278,7 @@
if (!user_mode(regs)) {
printk("pc : %pS\n", (void *)regs->pc);
- printk("lr : %pS\n", (void *)lr);
+ printk("lr : %pS\n", (void *)ptrauth_strip_insn_pac(lr));
} else {
printk("pc : %016llx\n", regs->pc);
printk("lr : %016llx\n", lr);
@@ -291,7 +307,7 @@
void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
- dump_backtrace(regs, NULL);
+ dump_backtrace(regs, NULL, KERN_DEFAULT);
}
static void tls_thread_flush(void)
@@ -323,6 +339,7 @@
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_mte_state();
}
void release_thread(struct task_struct *dead_task)
@@ -355,12 +372,15 @@
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
+ /* clear any pending asynchronous tag fault raised by the parent */
+ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
+
return 0;
}
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
-int copy_thread_tls(unsigned long clone_flags, unsigned long stack_start,
+int copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, unsigned long tls)
{
struct pt_regs *childregs = task_pt_regs(p);
@@ -376,6 +396,8 @@
*/
fpsimd_flush_task_state(p);
+ ptrauth_thread_init_kernel(p);
+
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
@@ -406,8 +428,7 @@
cpus_have_const_cap(ARM64_HAS_UAO))
childregs->pstate |= PSR_UAO_BIT;
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
- set_ssbs_bit(childregs);
+ spectre_v4_enable_task_mitigation(p);
if (system_uses_irq_prio_masking())
childregs->pmr_save = GIC_PRIO_IRQON;
@@ -457,8 +478,6 @@
*/
static void ssbs_thread_switch(struct task_struct *next)
{
- struct pt_regs *regs = task_pt_regs(next);
-
/*
* Nothing to do for kernel threads, but 'regs' may be junk
* (e.g. idle task) so check the flags and bail early.
@@ -470,18 +489,10 @@
* If all CPUs implement the SSBS extension, then we just need to
* context-switch the PSTATE field.
*/
- if (cpu_have_feature(cpu_feature(SSBS)))
+ if (cpus_have_const_cap(ARM64_SSBS))
return;
- /* If the mitigation is enabled, then we leave SSBS clear. */
- if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
- test_tsk_thread_flag(next, TIF_SSBD))
- return;
-
- if (compat_user_mode(regs))
- set_compat_ssbs_bit(regs);
- else if (user_mode(regs))
- set_ssbs_bit(regs);
+ spectre_v4_enable_task_mitigation(next);
}
/*
@@ -500,34 +511,26 @@
/*
* ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Assuming the virtual counter is enabled at the beginning of times:
- *
- * - disable access when switching from a 64bit task to a 32bit task
- * - enable access when switching from a 32bit task to a 64bit task
+ * Ensure access is disabled when switching to a 32bit task, ensure
+ * access is enabled when switching to a 64bit task.
*/
-static void erratum_1418040_thread_switch(struct task_struct *prev,
- struct task_struct *next)
+static void erratum_1418040_thread_switch(struct task_struct *next)
{
- bool prev32, next32;
- u64 val;
-
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
+ if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
+ !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
return;
- prev32 = is_compat_thread(task_thread_info(prev));
- next32 = is_compat_thread(task_thread_info(next));
-
- if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
-
- val = read_sysreg(cntkctl_el1);
-
- if (!next32)
- val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+ if (is_compat_thread(task_thread_info(next)))
+ sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
- val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
+ sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
+}
- write_sysreg(val, cntkctl_el1);
+static void erratum_1418040_new_exec(void)
+{
+ preempt_disable();
+ erratum_1418040_thread_switch(current);
+ preempt_enable();
}
/*
@@ -544,9 +547,8 @@
contextidr_thread_switch(next);
entry_task_switch(next);
uao_thread_switch(next);
- ptrauth_thread_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(prev, next);
+ erratum_1418040_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -556,6 +558,13 @@
*/
dsb(ish);
+ /*
+ * MTE thread switching must happen after the DSB above to ensure that
+ * any asynchronous tag check faults have been logged in the TFSR*_EL1
+ * registers.
+ */
+ mte_thread_switch(next);
+
/* the actual thread switch */
last = cpu_switch_to(prev, next);
@@ -605,6 +614,12 @@
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
ptrauth_thread_init_user(current);
+ erratum_1418040_new_exec();
+
+ if (task_spec_ssb_noexec(current)) {
+ arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
+ PR_SPEC_ENABLE);
+ }
}
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
@@ -613,11 +628,18 @@
*/
static unsigned int tagged_addr_disabled;
-long set_tagged_addr_ctrl(unsigned long arg)
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
{
- if (is_compat_task())
+ unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
+ struct thread_info *ti = task_thread_info(task);
+
+ if (is_compat_thread(ti))
return -EINVAL;
- if (arg & ~PR_TAGGED_ADDR_ENABLE)
+
+ if (system_supports_mte())
+ valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
+
+ if (arg & ~valid_mask)
return -EINVAL;
/*
@@ -627,20 +649,28 @@
if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
return -EINVAL;
- update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
+ if (set_mte_ctrl(task, arg) != 0)
+ return -EINVAL;
+
+ update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
return 0;
}
-long get_tagged_addr_ctrl(void)
+long get_tagged_addr_ctrl(struct task_struct *task)
{
- if (is_compat_task())
+ long ret = 0;
+ struct thread_info *ti = task_thread_info(task);
+
+ if (is_compat_thread(ti))
return -EINVAL;
- if (test_thread_flag(TIF_TAGGED_ADDR))
- return PR_TAGGED_ADDR_ENABLE;
+ if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
+ ret = PR_TAGGED_ADDR_ENABLE;
- return 0;
+ ret |= get_mte_ctrl(task);
+
+ return ret;
}
/*
@@ -648,8 +678,6 @@
* only prevents the tagged address ABI enabling via prctl() and does not
* disable it for tasks that already opted in to the relaxed ABI.
*/
-static int zero;
-static int one = 1;
static struct ctl_table tagged_addr_sysctl_table[] = {
{
@@ -658,8 +686,8 @@
.data = &tagged_addr_disabled,
.maxlen = sizeof(int),
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{ }
};
@@ -686,6 +714,28 @@
* Only allow a task to be preempted once cpufeatures have been
* enabled.
*/
- if (static_branch_likely(&arm64_const_caps_ready))
+ if (system_capabilities_finalized())
preempt_schedule_irq();
}
+
+#ifdef CONFIG_BINFMT_ELF
+int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
+ bool has_interp, bool is_interp)
+{
+ /*
+ * For dynamically linked executables the interpreter is
+ * responsible for setting PROT_BTI on everything except
+ * itself.
+ */
+ if (is_interp != has_interp)
+ return prot;
+
+ if (!(state->flags & ARM64_ELF_BTI))
+ return prot;
+
+ if (prot & PROT_EXEC)
+ prot |= PROT_BTI;
+
+ return prot;
+}
+#endif
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
new file mode 100644
index 0000000..3dd489b
--- /dev/null
+++ b/arch/arm64/kernel/proton-pack.c
@@ -0,0 +1,1136 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as
+ * detailed at:
+ *
+ * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability
+ *
+ * This code was originally written hastily under an awful lot of stress and so
+ * aspects of it are somewhat hacky. Unfortunately, changing anything in here
+ * instantly makes me feel ill. Thanks, Jann. Thann.
+ *
+ * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
+ * Copyright (C) 2020 Google LLC
+ *
+ * "If there's something strange in your neighbourhood, who you gonna call?"
+ *
+ * Authors: Will Deacon <will@kernel.org> and Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/bpf.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/insn.h>
+#include <asm/spectre.h>
+#include <asm/traps.h>
+#include <asm/vectors.h>
+#include <asm/virt.h>
+
+/*
+ * We try to ensure that the mitigation state can never change as the result of
+ * onlining a late CPU.
+ */
+static void update_mitigation_state(enum mitigation_state *oldp,
+ enum mitigation_state new)
+{
+ enum mitigation_state state;
+
+ do {
+ state = READ_ONCE(*oldp);
+ if (new <= state)
+ break;
+
+ /* Userspace almost certainly can't deal with this. */
+ if (WARN_ON(system_capabilities_finalized()))
+ break;
+ } while (cmpxchg_relaxed(oldp, state, new) != state);
+}
+
+/*
+ * Spectre v1.
+ *
+ * The kernel can't protect userspace for this one: it's each person for
+ * themselves. Advertise what we're doing and be done with it.
+ */
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+/*
+ * Spectre v2.
+ *
+ * This one sucks. A CPU is either:
+ *
+ * - Mitigated in hardware and advertised by ID_AA64PFR0_EL1.CSV2.
+ * - Mitigated in hardware and listed in our "safe list".
+ * - Mitigated in software by firmware.
+ * - Mitigated in software by a CPU-specific dance in the kernel and a
+ * firmware call at EL2.
+ * - Vulnerable.
+ *
+ * It's not unlikely for different CPUs in a big.LITTLE system to fall into
+ * different camps.
+ */
+static enum mitigation_state spectre_v2_state;
+
+static bool __read_mostly __nospectre_v2;
+static int __init parse_spectre_v2_param(char *str)
+{
+ __nospectre_v2 = true;
+ return 0;
+}
+early_param("nospectre_v2", parse_spectre_v2_param);
+
+static bool spectre_v2_mitigations_off(void)
+{
+ bool ret = __nospectre_v2 || cpu_mitigations_off();
+
+ if (ret)
+ pr_info_once("spectre-v2 mitigation disabled by command line option\n");
+
+ return ret;
+}
+
+static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
+{
+ switch (bhb_state) {
+ case SPECTRE_UNAFFECTED:
+ return "";
+ default:
+ case SPECTRE_VULNERABLE:
+ return ", but not BHB";
+ case SPECTRE_MITIGATED:
+ return ", BHB";
+ }
+}
+
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ return !sysctl_unprivileged_bpf_disabled;
+#else
+ return false;
+#endif
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ enum mitigation_state bhb_state = arm64_get_spectre_bhb_state();
+ const char *bhb_str = get_bhb_affected_string(bhb_state);
+ const char *v2_str = "Branch predictor hardening";
+
+ switch (spectre_v2_state) {
+ case SPECTRE_UNAFFECTED:
+ if (bhb_state == SPECTRE_UNAFFECTED)
+ return sprintf(buf, "Not affected\n");
+
+ /*
+ * Platforms affected by Spectre-BHB can't report
+ * "Not affected" for Spectre-v2.
+ */
+ v2_str = "CSV2";
+ fallthrough;
+ case SPECTRE_MITIGATED:
+ if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+ return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str);
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
+}
+
+static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
+{
+ u64 pfr0;
+ static const struct midr_range spectre_v2_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ { /* sentinel */ }
+ };
+
+ /* If the CPU has CSV2 set, we're safe */
+ pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+ if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+ return SPECTRE_UNAFFECTED;
+
+ /* Alternatively, we have a list of unaffected CPUs */
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+ return SPECTRE_UNAFFECTED;
+
+ return SPECTRE_VULNERABLE;
+}
+
+static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ if (spectre_v2_get_cpu_hw_mitigation_state() == SPECTRE_UNAFFECTED)
+ return false;
+
+ if (spectre_v2_get_cpu_fw_mitigation_state() == SPECTRE_UNAFFECTED)
+ return false;
+
+ return true;
+}
+
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+enum mitigation_state arm64_get_spectre_v2_state(void)
+{
+ return spectre_v2_state;
+}
+
+#ifdef CONFIG_KVM
+#include <asm/cacheflush.h>
+#include <asm/kvm_asm.h>
+
+atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
+
+static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K);
+ int i;
+
+ for (i = 0; i < SZ_2K; i += 0x80)
+ memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
+
+ __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
+}
+
+static DEFINE_RAW_SPINLOCK(bp_lock);
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
+{
+ int cpu, slot = -1;
+ const char *hyp_vecs_start = __smccc_workaround_1_smc;
+ const char *hyp_vecs_end = __smccc_workaround_1_smc +
+ __SMCCC_WORKAROUND_1_SMC_SZ;
+
+ /*
+ * Vinz Clortho takes the hyp_vecs start/end "keys" at
+ * the door when we're a guest. Skip the hyp-vectors work.
+ */
+ if (!is_hyp_mode_available()) {
+ __this_cpu_write(bp_hardening_data.fn, fn);
+ return;
+ }
+
+ raw_spin_lock(&bp_lock);
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
+ slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+ break;
+ }
+ }
+
+ if (slot == -1) {
+ slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
+ __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+ }
+
+ __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+ __this_cpu_write(bp_hardening_data.fn, fn);
+ __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start);
+ raw_spin_unlock(&bp_lock);
+}
+#else
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
+{
+ __this_cpu_write(bp_hardening_data.fn, fn);
+}
+#endif /* CONFIG_KVM */
+
+static void call_smc_arch_workaround_1(void)
+{
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void call_hvc_arch_workaround_1(void)
+{
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void qcom_link_stack_sanitisation(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
+static bp_hardening_cb_t spectre_v2_get_sw_mitigation_cb(void)
+{
+ u32 midr = read_cpuid_id();
+ if (((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR) &&
+ ((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR_V1))
+ return NULL;
+
+ return qcom_link_stack_sanitisation;
+}
+
+static enum mitigation_state spectre_v2_enable_fw_mitigation(void)
+{
+ bp_hardening_cb_t cb;
+ enum mitigation_state state;
+
+ state = spectre_v2_get_cpu_fw_mitigation_state();
+ if (state != SPECTRE_MITIGATED)
+ return state;
+
+ if (spectre_v2_mitigations_off())
+ return SPECTRE_VULNERABLE;
+
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ cb = call_hvc_arch_workaround_1;
+ break;
+
+ case SMCCC_CONDUIT_SMC:
+ cb = call_smc_arch_workaround_1;
+ break;
+
+ default:
+ return SPECTRE_VULNERABLE;
+ }
+
+ /*
+ * Prefer a CPU-specific workaround if it exists. Note that we
+ * still rely on firmware for the mitigation at EL2.
+ */
+ cb = spectre_v2_get_sw_mitigation_cb() ?: cb;
+ install_bp_hardening_cb(cb);
+ return SPECTRE_MITIGATED;
+}
+
+void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+ enum mitigation_state state;
+
+ WARN_ON(preemptible());
+
+ state = spectre_v2_get_cpu_hw_mitigation_state();
+ if (state == SPECTRE_VULNERABLE)
+ state = spectre_v2_enable_fw_mitigation();
+
+ update_mitigation_state(&spectre_v2_state, state);
+}
+
+/*
+ * Spectre v4.
+ *
+ * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is
+ * either:
+ *
+ * - Mitigated in hardware and listed in our "safe list".
+ * - Mitigated in hardware via PSTATE.SSBS.
+ * - Mitigated in software by firmware (sometimes referred to as SSBD).
+ *
+ * Wait, that doesn't sound so bad, does it? Keep reading...
+ *
+ * A major source of headaches is that the software mitigation is enabled both
+ * on a per-task basis, but can also be forced on for the kernel, necessitating
+ * both context-switch *and* entry/exit hooks. To make it even worse, some CPUs
+ * allow EL0 to toggle SSBS directly, which can end up with the prctl() state
+ * being stale when re-entering the kernel. The usual big.LITTLE caveats apply,
+ * so you can have systems that have both firmware and SSBS mitigations. This
+ * means we actually have to reject late onlining of CPUs with mitigations if
+ * all of the currently onlined CPUs are safelisted, as the mitigation tends to
+ * be opt-in for userspace. Yes, really, the cure is worse than the disease.
+ *
+ * The only good part is that if the firmware mitigation is present, then it is
+ * present for all CPUs, meaning we don't have to worry about late onlining of a
+ * vulnerable CPU if one of the boot CPUs is using the firmware mitigation.
+ *
+ * Give me a VAX-11/780 any day of the week...
+ */
+static enum mitigation_state spectre_v4_state;
+
+/* This is the per-cpu state tracking whether we need to talk to firmware */
+DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+enum spectre_v4_policy {
+ SPECTRE_V4_POLICY_MITIGATION_DYNAMIC,
+ SPECTRE_V4_POLICY_MITIGATION_ENABLED,
+ SPECTRE_V4_POLICY_MITIGATION_DISABLED,
+};
+
+static enum spectre_v4_policy __read_mostly __spectre_v4_policy;
+
+static const struct spectre_v4_param {
+ const char *str;
+ enum spectre_v4_policy policy;
+} spectre_v4_params[] = {
+ { "force-on", SPECTRE_V4_POLICY_MITIGATION_ENABLED, },
+ { "force-off", SPECTRE_V4_POLICY_MITIGATION_DISABLED, },
+ { "kernel", SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, },
+};
+static int __init parse_spectre_v4_param(char *str)
+{
+ int i;
+
+ if (!str || !str[0])
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(spectre_v4_params); i++) {
+ const struct spectre_v4_param *param = &spectre_v4_params[i];
+
+ if (strncmp(str, param->str, strlen(param->str)))
+ continue;
+
+ __spectre_v4_policy = param->policy;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("ssbd", parse_spectre_v4_param);
+
+/*
+ * Because this was all written in a rush by people working in different silos,
+ * we've ended up with multiple command line options to control the same thing.
+ * Wrap these up in some helpers, which prefer disabling the mitigation if faced
+ * with contradictory parameters. The mitigation is always either "off",
+ * "dynamic" or "on".
+ */
+static bool spectre_v4_mitigations_off(void)
+{
+ bool ret = cpu_mitigations_off() ||
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
+
+ if (ret)
+ pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
+
+ return ret;
+}
+
+/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
+static bool spectre_v4_mitigations_dynamic(void)
+{
+ return !spectre_v4_mitigations_off() &&
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DYNAMIC;
+}
+
+static bool spectre_v4_mitigations_on(void)
+{
+ return !spectre_v4_mitigations_off() &&
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_ENABLED;
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ switch (spectre_v4_state) {
+ case SPECTRE_UNAFFECTED:
+ return sprintf(buf, "Not affected\n");
+ case SPECTRE_MITIGATED:
+ return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n");
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
+}
+
+enum mitigation_state arm64_get_spectre_v4_state(void)
+{
+ return spectre_v4_state;
+}
+
+static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void)
+{
+ static const struct midr_range spectre_v4_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ { /* sentinel */ },
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list))
+ return SPECTRE_UNAFFECTED;
+
+ /* CPU features are detected first */
+ if (this_cpu_has_cap(ARM64_SSBS))
+ return SPECTRE_MITIGATED;
+
+ return SPECTRE_VULNERABLE;
+}
+
+static enum mitigation_state spectre_v4_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ fallthrough;
+ case SMCCC_RET_NOT_REQUIRED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ enum mitigation_state state;
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ state = spectre_v4_get_cpu_hw_mitigation_state();
+ if (state == SPECTRE_VULNERABLE)
+ state = spectre_v4_get_cpu_fw_mitigation_state();
+
+ return state != SPECTRE_UNAFFECTED;
+}
+
+static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
+{
+ if (user_mode(regs))
+ return 1;
+
+ if (instr & BIT(PSTATE_Imm_shift))
+ regs->pstate |= PSR_SSBS_BIT;
+ else
+ regs->pstate &= ~PSR_SSBS_BIT;
+
+ arm64_skip_faulting_instruction(regs, 4);
+ return 0;
+}
+
+static struct undef_hook ssbs_emulation_hook = {
+ .instr_mask = ~(1U << PSTATE_Imm_shift),
+ .instr_val = 0xd500401f | PSTATE_SSBS,
+ .fn = ssbs_emulation_handler,
+};
+
+static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
+{
+ static bool undef_hook_registered = false;
+ static DEFINE_RAW_SPINLOCK(hook_lock);
+ enum mitigation_state state;
+
+ /*
+ * If the system is mitigated but this CPU doesn't have SSBS, then
+ * we must be on the safelist and there's nothing more to do.
+ */
+ state = spectre_v4_get_cpu_hw_mitigation_state();
+ if (state != SPECTRE_MITIGATED || !this_cpu_has_cap(ARM64_SSBS))
+ return state;
+
+ raw_spin_lock(&hook_lock);
+ if (!undef_hook_registered) {
+ register_undef_hook(&ssbs_emulation_hook);
+ undef_hook_registered = true;
+ }
+ raw_spin_unlock(&hook_lock);
+
+ if (spectre_v4_mitigations_off()) {
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
+ asm volatile(SET_PSTATE_SSBS(1));
+ return SPECTRE_VULNERABLE;
+ }
+
+ /* SCTLR_EL1.DSSBS was initialised to 0 during boot */
+ asm volatile(SET_PSTATE_SSBS(0));
+ return SPECTRE_MITIGATED;
+}
+
+/*
+ * Patch a branch over the Spectre-v4 mitigation code with a NOP so that
+ * we fallthrough and check whether firmware needs to be called on this CPU.
+ */
+void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1); /* Branch -> NOP */
+
+ if (spectre_v4_mitigations_off())
+ return;
+
+ if (cpus_have_final_cap(ARM64_SSBS))
+ return;
+
+ if (spectre_v4_mitigations_dynamic())
+ *updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/*
+ * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction
+ * to call into firmware to adjust the mitigation state.
+ */
+void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ u32 insn;
+
+ BUG_ON(nr_inst != 1); /* NOP -> HVC/SMC */
+
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ insn = aarch64_insn_get_hvc_value();
+ break;
+ case SMCCC_CONDUIT_SMC:
+ insn = aarch64_insn_get_smc_value();
+ break;
+ default:
+ return;
+ }
+
+ *updptr = cpu_to_le32(insn);
+}
+
+static enum mitigation_state spectre_v4_enable_fw_mitigation(void)
+{
+ enum mitigation_state state;
+
+ state = spectre_v4_get_cpu_fw_mitigation_state();
+ if (state != SPECTRE_MITIGATED)
+ return state;
+
+ if (spectre_v4_mitigations_off()) {
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, false, NULL);
+ return SPECTRE_VULNERABLE;
+ }
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, true, NULL);
+
+ if (spectre_v4_mitigations_dynamic())
+ __this_cpu_write(arm64_ssbd_callback_required, 1);
+
+ return SPECTRE_MITIGATED;
+}
+
+void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+ enum mitigation_state state;
+
+ WARN_ON(preemptible());
+
+ state = spectre_v4_enable_hw_mitigation();
+ if (state == SPECTRE_VULNERABLE)
+ state = spectre_v4_enable_fw_mitigation();
+
+ update_mitigation_state(&spectre_v4_state, state);
+}
+
+static void __update_pstate_ssbs(struct pt_regs *regs, bool state)
+{
+ u64 bit = compat_user_mode(regs) ? PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+ if (state)
+ regs->pstate |= bit;
+ else
+ regs->pstate &= ~bit;
+}
+
+void spectre_v4_enable_task_mitigation(struct task_struct *tsk)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+ bool ssbs = false, kthread = tsk->flags & PF_KTHREAD;
+
+ if (spectre_v4_mitigations_off())
+ ssbs = true;
+ else if (spectre_v4_mitigations_dynamic() && !kthread)
+ ssbs = !test_tsk_thread_flag(tsk, TIF_SSBD);
+
+ __update_pstate_ssbs(regs, ssbs);
+}
+
+/*
+ * The Spectre-v4 mitigation can be controlled via a prctl() from userspace.
+ * This is interesting because the "speculation disabled" behaviour can be
+ * configured so that it is preserved across exec(), which means that the
+ * prctl() may be necessary even when PSTATE.SSBS can be toggled directly
+ * from userspace.
+ */
+static void ssbd_prctl_enable_mitigation(struct task_struct *task)
+{
+ task_clear_spec_ssb_noexec(task);
+ task_set_spec_ssb_disable(task);
+ set_tsk_thread_flag(task, TIF_SSBD);
+}
+
+static void ssbd_prctl_disable_mitigation(struct task_struct *task)
+{
+ task_clear_spec_ssb_noexec(task);
+ task_clear_spec_ssb_disable(task);
+ clear_tsk_thread_flag(task, TIF_SSBD);
+}
+
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* Enable speculation: disable mitigation */
+ /*
+ * Force disabled speculation prevents it from being
+ * re-enabled.
+ */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+
+ /*
+ * If the mitigation is forced on, then speculation is forced
+ * off and we again prevent it from being re-enabled.
+ */
+ if (spectre_v4_mitigations_on())
+ return -EPERM;
+
+ ssbd_prctl_disable_mitigation(task);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ /* Force disable speculation: force enable mitigation */
+ /*
+ * If the mitigation is forced off, then speculation is forced
+ * on and we prevent it from being disabled.
+ */
+ if (spectre_v4_mitigations_off())
+ return -EPERM;
+
+ task_set_spec_ssb_force_disable(task);
+ fallthrough;
+ case PR_SPEC_DISABLE:
+ /* Disable speculation: enable mitigation */
+ /* Same as PR_SPEC_FORCE_DISABLE */
+ if (spectre_v4_mitigations_off())
+ return -EPERM;
+
+ ssbd_prctl_enable_mitigation(task);
+ break;
+ case PR_SPEC_DISABLE_NOEXEC:
+ /* Disable speculation until execve(): enable mitigation */
+ /*
+ * If the mitigation state is forced one way or the other, then
+ * we must fail now before we try to toggle it on execve().
+ */
+ if (task_spec_ssb_force_disable(task) ||
+ spectre_v4_mitigations_off() ||
+ spectre_v4_mitigations_on()) {
+ return -EPERM;
+ }
+
+ ssbd_prctl_enable_mitigation(task);
+ task_set_spec_ssb_noexec(task);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ spectre_v4_enable_task_mitigation(task);
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssbd_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+static int ssbd_prctl_get(struct task_struct *task)
+{
+ switch (spectre_v4_state) {
+ case SPECTRE_UNAFFECTED:
+ return PR_SPEC_NOT_AFFECTED;
+ case SPECTRE_MITIGATED:
+ if (spectre_v4_mitigations_on())
+ return PR_SPEC_NOT_AFFECTED;
+
+ if (spectre_v4_mitigations_dynamic())
+ break;
+
+ /* Mitigations are disabled, so we're vulnerable. */
+ fallthrough;
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+ return PR_SPEC_ENABLE;
+ }
+
+ /* Check the mitigation state for this task */
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+
+ if (task_spec_ssb_noexec(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
+
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssbd_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+/*
+ * Spectre BHB.
+ *
+ * A CPU is either:
+ * - Mitigated by a branchy loop a CPU specific number of times, and listed
+ * in our "loop mitigated list".
+ * - Mitigated in software by the firmware Spectre v2 call.
+ * - Has the ClearBHB instruction to perform the mitigation.
+ * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
+ * software mitigation in the vectors is needed.
+ * - Has CSV2.3, so is unaffected.
+ */
+static enum mitigation_state spectre_bhb_state;
+
+enum mitigation_state arm64_get_spectre_bhb_state(void)
+{
+ return spectre_bhb_state;
+}
+
+/*
+ * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
+ * SCOPE_SYSTEM call will give the right answer.
+ */
+u8 spectre_bhb_loop_affected(int scope)
+{
+ u8 k = 0;
+ static u8 max_bhb_k;
+
+ if (scope == SCOPE_LOCAL_CPU) {
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+ k = 8;
+
+ max_bhb_k = max(max_bhb_k, k);
+ } else {
+ k = max_bhb_k;
+ }
+
+ return k;
+}
+
+static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_3, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+static bool is_spectre_bhb_fw_affected(int scope)
+{
+ static bool system_affected;
+ enum mitigation_state fw_state;
+ bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
+ static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+ {},
+ };
+ bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
+ spectre_bhb_firmware_mitigated_list);
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return system_affected;
+
+ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+ if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
+ system_affected = true;
+ return true;
+ }
+
+ return false;
+}
+
+static bool supports_ecbhb(int scope)
+{
+ u64 mmfr1;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ else
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_ECBHB_SHIFT);
+}
+
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ if (supports_csv2p3(scope))
+ return false;
+
+ if (supports_clearbhb(scope))
+ return true;
+
+ if (spectre_bhb_loop_affected(scope))
+ return true;
+
+ if (is_spectre_bhb_fw_affected(scope))
+ return true;
+
+ return false;
+}
+
+static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
+{
+ const char *v = arm64_get_bp_hardening_vector(slot);
+
+ if (slot < 0)
+ return;
+
+ __this_cpu_write(this_cpu_vector, v);
+
+ /*
+ * When KPTI is in use, the vectors are switched when exiting to
+ * user-space.
+ */
+ if (arm64_kernel_unmapped_at_el0())
+ return;
+
+ write_sysreg(v, vbar_el1);
+ isb();
+}
+
+#ifdef CONFIG_KVM
+static int kvm_bhb_get_vecs_size(const char *start)
+{
+ if (start == __smccc_workaround_3_smc)
+ return __SMCCC_WORKAROUND_3_SMC_SZ;
+ else if (start == __spectre_bhb_loop_k8 ||
+ start == __spectre_bhb_loop_k24 ||
+ start == __spectre_bhb_loop_k32)
+ return __SPECTRE_BHB_LOOP_SZ;
+ else if (start == __spectre_bhb_clearbhb)
+ return __SPECTRE_BHB_CLEARBHB_SZ;
+
+ return 0;
+}
+
+static void kvm_setup_bhb_slot(const char *hyp_vecs_start)
+{
+ int cpu, slot = -1, size;
+ const char *hyp_vecs_end;
+
+ if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
+ return;
+
+ size = kvm_bhb_get_vecs_size(hyp_vecs_start);
+ if (WARN_ON_ONCE(!hyp_vecs_start || !size))
+ return;
+ hyp_vecs_end = hyp_vecs_start + size;
+
+ raw_spin_lock(&bp_lock);
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(bp_hardening_data.template_start, cpu) == hyp_vecs_start) {
+ slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+ break;
+ }
+ }
+
+ if (slot == -1) {
+ slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
+ __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+ }
+
+ __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+ __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start);
+ raw_spin_unlock(&bp_lock);
+}
+#else
+#define __smccc_workaround_3_smc NULL
+#define __spectre_bhb_loop_k8 NULL
+#define __spectre_bhb_loop_k24 NULL
+#define __spectre_bhb_loop_k32 NULL
+#define __spectre_bhb_clearbhb NULL
+
+static void kvm_setup_bhb_slot(const char *hyp_vecs_start) { }
+#endif /* CONFIG_KVM */
+
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
+{
+ enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+
+ if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
+ return;
+
+ if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
+ /* No point mitigating Spectre-BHB alone. */
+ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
+ pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
+ } else if (cpu_mitigations_off()) {
+ pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+ } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
+ state = SPECTRE_MITIGATED;
+ } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) {
+ kvm_setup_bhb_slot(__spectre_bhb_clearbhb);
+ this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
+
+ state = SPECTRE_MITIGATED;
+ } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ switch (spectre_bhb_loop_affected(SCOPE_SYSTEM)) {
+ case 8:
+ kvm_setup_bhb_slot(__spectre_bhb_loop_k8);
+ break;
+ case 24:
+ kvm_setup_bhb_slot(__spectre_bhb_loop_k24);
+ break;
+ case 32:
+ kvm_setup_bhb_slot(__spectre_bhb_loop_k32);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
+
+ state = SPECTRE_MITIGATED;
+ } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
+ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+ if (fw_state == SPECTRE_MITIGATED) {
+ kvm_setup_bhb_slot(__smccc_workaround_3_smc);
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ state = SPECTRE_MITIGATED;
+ }
+ }
+
+ update_mitigation_state(&spectre_bhb_state, state);
+}
+
+/* Patched to correct the immediate */
+void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u8 rd;
+ u32 insn;
+ u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
+
+ BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY))
+ return;
+
+ insn = le32_to_cpu(*origptr);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+ insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n"
+void unpriv_ebpf_notify(int new_state)
+{
+ if (spectre_v2_state == SPECTRE_VULNERABLE ||
+ spectre_bhb_state != SPECTRE_MITIGATED)
+ return;
+
+ if (!new_state)
+ pr_err("WARNING: %s", EBPF_WARN);
+}
+#endif
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8a95a01..2817e39 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -34,7 +34,7 @@
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/fpsimd.h>
-#include <asm/pgtable.h>
+#include <asm/mte.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
#include <asm/syscall.h>
@@ -475,11 +475,10 @@
static int hw_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
unsigned int note_type = regset->core_note_type;
- int ret, idx = 0, offset, limit;
+ int ret, idx = 0;
u32 info, ctrl;
u64 addr;
@@ -488,49 +487,21 @@
if (ret)
return ret;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0,
- sizeof(info));
- if (ret)
- return ret;
-
- /* Pad */
- offset = offsetof(struct user_hwdebug_state, pad);
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset,
- offset + PTRACE_HBP_PAD_SZ);
- if (ret)
- return ret;
-
+ membuf_write(&to, &info, sizeof(info));
+ membuf_zero(&to, sizeof(u32));
/* (address, ctrl) registers */
- offset = offsetof(struct user_hwdebug_state, dbg_regs);
- limit = regset->n * regset->size;
- while (count && offset < limit) {
+ while (to.left) {
ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
if (ret)
return ret;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr,
- offset, offset + PTRACE_HBP_ADDR_SZ);
- if (ret)
- return ret;
- offset += PTRACE_HBP_ADDR_SZ;
-
ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl);
if (ret)
return ret;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl,
- offset, offset + PTRACE_HBP_CTRL_SZ);
- if (ret)
- return ret;
- offset += PTRACE_HBP_CTRL_SZ;
-
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- offset,
- offset + PTRACE_HBP_PAD_SZ);
- if (ret)
- return ret;
- offset += PTRACE_HBP_PAD_SZ;
+ membuf_store(&to, addr);
+ membuf_store(&to, ctrl);
+ membuf_zero(&to, sizeof(u32));
idx++;
}
-
return 0;
}
@@ -590,11 +561,10 @@
static int gpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+ return membuf_write(&to, uregs, sizeof(*uregs));
}
static int gpr_set(struct task_struct *target, const struct user_regset *regset,
@@ -627,8 +597,7 @@
*/
static int __fpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf, unsigned int start_pos)
+ struct membuf to)
{
struct user_fpsimd_state *uregs;
@@ -636,13 +605,11 @@
uregs = &target->thread.uw.fpsimd_state;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
- start_pos, start_pos + sizeof(*uregs));
+ return membuf_write(&to, uregs, sizeof(*uregs));
}
static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
if (!system_supports_fpsimd())
return -EINVAL;
@@ -650,7 +617,7 @@
if (target == current)
fpsimd_preserve_current_state();
- return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
+ return __fpr_get(target, regset, to);
}
static int __fpr_set(struct task_struct *target,
@@ -700,15 +667,12 @@
}
static int tls_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- unsigned long *tls = &target->thread.uw.tp_value;
-
if (target == current)
tls_preserve_current_state();
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
+ return membuf_store(&to, target->thread.uw.tp_value);
}
static int tls_set(struct task_struct *target, const struct user_regset *regset,
@@ -728,13 +692,9 @@
static int system_call_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- int syscallno = task_pt_regs(target)->syscallno;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &syscallno, 0, -1);
+ return membuf_store(&to, task_pt_regs(target)->syscallno);
}
static int system_call_set(struct task_struct *target,
@@ -781,24 +741,10 @@
return ALIGN(header->size, SVE_VQ_BYTES);
}
-static unsigned int sve_get_size(struct task_struct *target,
- const struct user_regset *regset)
-{
- struct user_sve_header header;
-
- if (!system_supports_sve())
- return 0;
-
- sve_init_header_from_task(&header, target);
- return sve_size_from_header(&header);
-}
-
static int sve_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- int ret;
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
@@ -810,10 +756,7 @@
sve_init_header_from_task(&header, target);
vq = sve_vq_from_vl(header.vl);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
- 0, sizeof(header));
- if (ret)
- return ret;
+ membuf_write(&to, &header, sizeof(header));
if (target == current)
fpsimd_preserve_current_state();
@@ -822,26 +765,18 @@
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
- return __fpr_get(target, regset, pos, count, kbuf, ubuf,
- SVE_PT_FPSIMD_OFFSET);
+ return __fpr_get(target, regset, to);
/* Otherwise: full SVE case */
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.sve_state,
- start, end);
- if (ret)
- return ret;
+ membuf_write(&to, target->thread.sve_state, end - start);
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- start, end);
- if (ret)
- return ret;
+ membuf_zero(&to, end - start);
/*
* Copy fpsr, and fpcr which must follow contiguously in
@@ -849,16 +784,11 @@
*/
start = end;
end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.uw.fpsimd_state.fpsr,
- start, end);
- if (ret)
- return ret;
+ membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start);
start = end;
end = sve_size_from_header(&header);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- start, end);
+ return membuf_zero(&to, end - start);
}
static int sve_set(struct task_struct *target,
@@ -962,8 +892,7 @@
#ifdef CONFIG_ARM64_PTR_AUTH
static int pac_mask_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
/*
* The PAC bits can differ across data and instruction pointers
@@ -979,7 +908,7 @@
if (!system_supports_address_auth())
return -EINVAL;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1);
+ return membuf_write(&to, &uregs, sizeof(uregs));
}
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -999,7 +928,7 @@
}
static void pac_address_keys_to_user(struct user_pac_address_keys *ukeys,
- const struct ptrauth_keys *keys)
+ const struct ptrauth_keys_user *keys)
{
ukeys->apiakey = pac_key_to_user(&keys->apia);
ukeys->apibkey = pac_key_to_user(&keys->apib);
@@ -1007,7 +936,7 @@
ukeys->apdbkey = pac_key_to_user(&keys->apdb);
}
-static void pac_address_keys_from_user(struct ptrauth_keys *keys,
+static void pac_address_keys_from_user(struct ptrauth_keys_user *keys,
const struct user_pac_address_keys *ukeys)
{
keys->apia = pac_key_from_user(ukeys->apiakey);
@@ -1018,10 +947,9 @@
static int pac_address_keys_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_address_keys user_keys;
if (!system_supports_address_auth())
@@ -1029,8 +957,7 @@
pac_address_keys_to_user(&user_keys, keys);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &user_keys, 0, -1);
+ return membuf_write(&to, &user_keys, sizeof(user_keys));
}
static int pac_address_keys_set(struct task_struct *target,
@@ -1038,7 +965,7 @@
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_address_keys user_keys;
int ret;
@@ -1056,12 +983,12 @@
}
static void pac_generic_keys_to_user(struct user_pac_generic_keys *ukeys,
- const struct ptrauth_keys *keys)
+ const struct ptrauth_keys_user *keys)
{
ukeys->apgakey = pac_key_to_user(&keys->apga);
}
-static void pac_generic_keys_from_user(struct ptrauth_keys *keys,
+static void pac_generic_keys_from_user(struct ptrauth_keys_user *keys,
const struct user_pac_generic_keys *ukeys)
{
keys->apga = pac_key_from_user(ukeys->apgakey);
@@ -1069,10 +996,9 @@
static int pac_generic_keys_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_generic_keys user_keys;
if (!system_supports_generic_auth())
@@ -1080,8 +1006,7 @@
pac_generic_keys_to_user(&user_keys, keys);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &user_keys, 0, -1);
+ return membuf_write(&to, &user_keys, sizeof(user_keys));
}
static int pac_generic_keys_set(struct task_struct *target,
@@ -1089,7 +1014,7 @@
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_generic_keys user_keys;
int ret;
@@ -1108,6 +1033,35 @@
#endif /* CONFIG_CHECKPOINT_RESTORE */
#endif /* CONFIG_ARM64_PTR_AUTH */
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+static int tagged_addr_ctrl_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ long ctrl = get_tagged_addr_ctrl(target);
+
+ if (IS_ERR_VALUE(ctrl))
+ return ctrl;
+
+ return membuf_write(&to, &ctrl, sizeof(ctrl));
+}
+
+static int tagged_addr_ctrl_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ return set_tagged_addr_ctrl(target, ctrl);
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1127,6 +1081,9 @@
REGSET_PACG_KEYS,
#endif
#endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+ REGSET_TAGGED_ADDR_CTRL,
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -1135,7 +1092,7 @@
.n = sizeof(struct user_pt_regs) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
- .get = gpr_get,
+ .regset_get = gpr_get,
.set = gpr_set
},
[REGSET_FPR] = {
@@ -1148,7 +1105,7 @@
.size = sizeof(u32),
.align = sizeof(u32),
.active = fpr_active,
- .get = fpr_get,
+ .regset_get = fpr_get,
.set = fpr_set
},
[REGSET_TLS] = {
@@ -1156,7 +1113,7 @@
.n = 1,
.size = sizeof(void *),
.align = sizeof(void *),
- .get = tls_get,
+ .regset_get = tls_get,
.set = tls_set,
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1165,7 +1122,7 @@
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
@@ -1173,7 +1130,7 @@
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
#endif
@@ -1182,7 +1139,7 @@
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
- .get = system_call_get,
+ .regset_get = system_call_get,
.set = system_call_set,
},
#ifdef CONFIG_ARM64_SVE
@@ -1192,9 +1149,8 @@
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
.align = SVE_VQ_BYTES,
- .get = sve_get,
+ .regset_get = sve_get,
.set = sve_set,
- .get_size = sve_get_size,
},
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -1203,7 +1159,7 @@
.n = sizeof(struct user_pac_mask) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
- .get = pac_mask_get,
+ .regset_get = pac_mask_get,
/* this cannot be set dynamically */
},
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -1212,7 +1168,7 @@
.n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
- .get = pac_address_keys_get,
+ .regset_get = pac_address_keys_get,
.set = pac_address_keys_set,
},
[REGSET_PACG_KEYS] = {
@@ -1220,11 +1176,21 @@
.n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
- .get = pac_generic_keys_get,
+ .regset_get = pac_generic_keys_get,
.set = pac_generic_keys_set,
},
#endif
#endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+ [REGSET_TAGGED_ADDR_CTRL] = {
+ .core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = tagged_addr_ctrl_get,
+ .set = tagged_addr_ctrl_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
@@ -1238,57 +1204,31 @@
REGSET_COMPAT_VFP,
};
+static inline compat_ulong_t compat_get_user_reg(struct task_struct *task, int idx)
+{
+ struct pt_regs *regs = task_pt_regs(task);
+
+ switch (idx) {
+ case 15:
+ return regs->pc;
+ case 16:
+ return pstate_to_compat_psr(regs->pstate);
+ case 17:
+ return regs->orig_x0;
+ default:
+ return regs->regs[idx];
+ }
+}
+
static int compat_gpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- int ret = 0;
- unsigned int i, start, num_regs;
+ int i = 0;
- /* Calculate the number of AArch32 registers contained in count */
- num_regs = count / regset->size;
-
- /* Convert pos into an register number */
- start = pos / regset->size;
-
- if (start + num_regs > regset->n)
- return -EIO;
-
- for (i = 0; i < num_regs; ++i) {
- unsigned int idx = start + i;
- compat_ulong_t reg;
-
- switch (idx) {
- case 15:
- reg = task_pt_regs(target)->pc;
- break;
- case 16:
- reg = task_pt_regs(target)->pstate;
- reg = pstate_to_compat_psr(reg);
- break;
- case 17:
- reg = task_pt_regs(target)->orig_x0;
- break;
- default:
- reg = task_pt_regs(target)->regs[idx];
- }
-
- if (kbuf) {
- memcpy(kbuf, ®, sizeof(reg));
- kbuf += sizeof(reg);
- } else {
- ret = copy_to_user(ubuf, ®, sizeof(reg));
- if (ret) {
- ret = -EFAULT;
- break;
- }
-
- ubuf += sizeof(reg);
- }
- }
-
- return ret;
+ while (to.left)
+ membuf_store(&to, compat_get_user_reg(target, i++));
+ return 0;
}
static int compat_gpr_set(struct task_struct *target,
@@ -1355,12 +1295,10 @@
static int compat_vfp_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
- int ret, vregs_end_pos;
if (!system_supports_fpsimd())
return -EINVAL;
@@ -1374,19 +1312,10 @@
* The VFP registers are packed into the fpsimd_state, so they all sit
* nicely together for us. We just need to create the fpscr separately.
*/
- vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
- 0, vregs_end_pos);
-
- if (count && !ret) {
- fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
- (uregs->fpcr & VFP_FPSCR_CTRL_MASK);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr,
- vregs_end_pos, VFP_STATE_SIZE);
- }
-
- return ret;
+ membuf_write(&to, uregs, VFP_STATE_SIZE - sizeof(compat_ulong_t));
+ fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
+ (uregs->fpcr & VFP_FPSCR_CTRL_MASK);
+ return membuf_store(&to, fpscr);
}
static int compat_vfp_set(struct task_struct *target,
@@ -1421,11 +1350,10 @@
}
static int compat_tls_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
+ const struct user_regset *regset,
+ struct membuf to)
{
- compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+ return membuf_store(&to, (compat_ulong_t)target->thread.uw.tp_value);
}
static int compat_tls_set(struct task_struct *target,
@@ -1450,7 +1378,7 @@
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
- .get = compat_gpr_get,
+ .regset_get = compat_gpr_get,
.set = compat_gpr_set
},
[REGSET_COMPAT_VFP] = {
@@ -1459,7 +1387,7 @@
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
.active = fpr_active,
- .get = compat_vfp_get,
+ .regset_get = compat_vfp_get,
.set = compat_vfp_set
},
};
@@ -1475,7 +1403,7 @@
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
- .get = compat_gpr_get,
+ .regset_get = compat_gpr_get,
.set = compat_gpr_set
},
[REGSET_FPR] = {
@@ -1483,7 +1411,7 @@
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
- .get = compat_vfp_get,
+ .regset_get = compat_vfp_get,
.set = compat_vfp_set
},
[REGSET_TLS] = {
@@ -1491,7 +1419,7 @@
.n = 1,
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
- .get = compat_tls_get,
+ .regset_get = compat_tls_get,
.set = compat_tls_set,
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1500,7 +1428,7 @@
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
@@ -1508,7 +1436,7 @@
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
#endif
@@ -1517,7 +1445,7 @@
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
- .get = system_call_get,
+ .regset_get = system_call_get,
.set = system_call_set,
},
};
@@ -1542,9 +1470,7 @@
else if (off == COMPAT_PT_TEXT_END_ADDR)
tmp = tsk->mm->end_code;
else if (off < sizeof(compat_elf_gregset_t))
- return copy_regset_to_user(tsk, &user_aarch32_view,
- REGSET_COMPAT_GPR, off,
- sizeof(compat_ulong_t), ret);
+ tmp = compat_get_user_reg(tsk, off >> 2);
else if (off >= COMPAT_USER_SZ)
return -EIO;
else
@@ -1556,8 +1482,8 @@
static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t val)
{
- int ret;
- mm_segment_t old_fs = get_fs();
+ struct pt_regs newregs = *task_pt_regs(tsk);
+ unsigned int idx = off / 4;
if (off & 3 || off >= COMPAT_USER_SZ)
return -EIO;
@@ -1565,14 +1491,25 @@
if (off >= sizeof(compat_elf_gregset_t))
return 0;
- set_fs(KERNEL_DS);
- ret = copy_regset_from_user(tsk, &user_aarch32_view,
- REGSET_COMPAT_GPR, off,
- sizeof(compat_ulong_t),
- &val);
- set_fs(old_fs);
+ switch (idx) {
+ case 15:
+ newregs.pc = val;
+ break;
+ case 16:
+ newregs.pstate = compat_psr_to_pstate(val);
+ break;
+ case 17:
+ newregs.orig_x0 = val;
+ break;
+ default:
+ newregs.regs[idx] = val;
+ }
- return ret;
+ if (!valid_user_regs(&newregs.user_regs, tsk))
+ return -EINVAL;
+
+ *task_pt_regs(tsk) = newregs;
+ return 0;
}
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1797,6 +1734,12 @@
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
+ switch (request) {
+ case PTRACE_PEEKMTETAGS:
+ case PTRACE_POKEMTETAGS:
+ return mte_ptrace_copy_tags(child, request, addr, data);
+ }
+
return ptrace_request(child, request, addr, data);
}
@@ -1812,8 +1755,20 @@
unsigned long saved_reg;
/*
- * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
- * used to denote syscall entry/exit:
+ * We have some ABI weirdness here in the way that we handle syscall
+ * exit stops because we indicate whether or not the stop has been
+ * signalled from syscall entry or syscall exit by clobbering a general
+ * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
+ * and restoring its old value after the stop. This means that:
+ *
+ * - Any writes by the tracer to this register during the stop are
+ * ignored/discarded.
+ *
+ * - The actual value of the register is not available during the stop,
+ * so the tracer cannot save it and restore it later.
+ *
+ * - Syscall stops behave differently to seccomp and pseudo-step traps
+ * (the latter do not nobble any registers).
*/
regno = (is_compat_task() ? 12 : 7);
saved_reg = regs->regs[regno];
@@ -1845,12 +1800,12 @@
if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
if (flags & _TIF_SYSCALL_EMU)
- return -1;
+ return NO_SYSCALL;
}
/* Do the secure computing after ptrace; failures should be fast. */
- if (secure_computing(NULL) == -1)
- return -1;
+ if (secure_computing() == -1)
+ return NO_SYSCALL;
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, regs->syscallno);
@@ -1887,8 +1842,8 @@
* We also reserve IL for the kernel; SS is handled dynamically.
*/
#define SPSR_EL1_AARCH64_RES0_BITS \
- (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \
- GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5))
+ (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \
+ GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5))
#define SPSR_EL1_AARCH32_RES0_BITS \
(GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20))
diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S
index 16a34f1..c50f45f 100644
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ b/arch/arm64/kernel/reloc_test_syms.S
@@ -5,81 +5,81 @@
#include <linux/linkage.h>
-ENTRY(absolute_data64)
+SYM_FUNC_START(absolute_data64)
ldr x0, 0f
ret
0: .quad sym64_abs
-ENDPROC(absolute_data64)
+SYM_FUNC_END(absolute_data64)
-ENTRY(absolute_data32)
+SYM_FUNC_START(absolute_data32)
ldr w0, 0f
ret
0: .long sym32_abs
-ENDPROC(absolute_data32)
+SYM_FUNC_END(absolute_data32)
-ENTRY(absolute_data16)
+SYM_FUNC_START(absolute_data16)
adr x0, 0f
ldrh w0, [x0]
ret
0: .short sym16_abs, 0
-ENDPROC(absolute_data16)
+SYM_FUNC_END(absolute_data16)
-ENTRY(signed_movw)
+SYM_FUNC_START(signed_movw)
movz x0, #:abs_g2_s:sym64_abs
movk x0, #:abs_g1_nc:sym64_abs
movk x0, #:abs_g0_nc:sym64_abs
ret
-ENDPROC(signed_movw)
+SYM_FUNC_END(signed_movw)
-ENTRY(unsigned_movw)
+SYM_FUNC_START(unsigned_movw)
movz x0, #:abs_g3:sym64_abs
movk x0, #:abs_g2_nc:sym64_abs
movk x0, #:abs_g1_nc:sym64_abs
movk x0, #:abs_g0_nc:sym64_abs
ret
-ENDPROC(unsigned_movw)
+SYM_FUNC_END(unsigned_movw)
.align 12
.space 0xff8
-ENTRY(relative_adrp)
+SYM_FUNC_START(relative_adrp)
adrp x0, sym64_rel
add x0, x0, #:lo12:sym64_rel
ret
-ENDPROC(relative_adrp)
+SYM_FUNC_END(relative_adrp)
.align 12
.space 0xffc
-ENTRY(relative_adrp_far)
+SYM_FUNC_START(relative_adrp_far)
adrp x0, memstart_addr
add x0, x0, #:lo12:memstart_addr
ret
-ENDPROC(relative_adrp_far)
+SYM_FUNC_END(relative_adrp_far)
-ENTRY(relative_adr)
+SYM_FUNC_START(relative_adr)
adr x0, sym64_rel
ret
-ENDPROC(relative_adr)
+SYM_FUNC_END(relative_adr)
-ENTRY(relative_data64)
+SYM_FUNC_START(relative_data64)
adr x1, 0f
ldr x0, [x1]
add x0, x0, x1
ret
0: .quad sym64_rel - .
-ENDPROC(relative_data64)
+SYM_FUNC_END(relative_data64)
-ENTRY(relative_data32)
+SYM_FUNC_START(relative_data32)
adr x1, 0f
ldr w0, [x1]
add x0, x0, x1
ret
0: .long sym64_rel - .
-ENDPROC(relative_data32)
+SYM_FUNC_END(relative_data32)
-ENTRY(relative_data16)
+SYM_FUNC_START(relative_data16)
adr x1, 0f
ldrsh w0, [x1]
add x0, x0, x1
ret
0: .short sym64_rel - ., 0
-ENDPROC(relative_data16)
+SYM_FUNC_END(relative_data16)
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index c1d7db7..84eec95 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -26,7 +26,7 @@
* control_code_page, a special page which has been set up to be preserved
* during the copy operation.
*/
-ENTRY(arm64_relocate_new_kernel)
+SYM_CODE_START(arm64_relocate_new_kernel)
/* Setup the list loop variables. */
mov x18, x2 /* x18 = dtb address */
@@ -36,18 +36,6 @@
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
- /* Clear the sctlr_el2 flags. */
- mrs x0, CurrentEL
- cmp x0, #CurrentEL_EL2
- b.ne 1f
- mrs x0, sctlr_el2
- ldr x1, =SCTLR_ELx_FLAGS
- bic x0, x0, x1
- pre_disable_mmu_workaround
- msr sctlr_el2, x0
- isb
-1:
-
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
@@ -111,9 +99,7 @@
mov x3, xzr
br x17
-ENDPROC(arm64_relocate_new_kernel)
-
-.ltorg
+SYM_CODE_END(arm64_relocate_new_kernel)
.align 3 /* To keep the 64-bit values below naturally aligned. */
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index a5e8b3b..a6d1875 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -18,16 +18,16 @@
void *addr;
};
-static int save_return_addr(struct stackframe *frame, void *d)
+static bool save_return_addr(void *d, unsigned long pc)
{
struct return_address_data *data = d;
if (!data->level) {
- data->addr = (void *)frame->pc;
- return 1;
+ data->addr = (void *)pc;
+ return false;
} else {
--data->level;
- return 0;
+ return true;
}
}
NOKPROBE_SYMBOL(save_return_addr);
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 0000000..e8f7ff4
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/scs.h>
+
+DEFINE_SCS(irq_shadow_call_stack);
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
+#endif
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8..793c46d 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -2,6 +2,7 @@
// Copyright (C) 2017 Arm Ltd.
#define pr_fmt(fmt) "sdei: " fmt
+#include <linux/arm-smccc.h>
#include <linux/arm_sdei.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
@@ -9,6 +10,7 @@
#include <linux/uaccess.h>
#include <asm/alternative.h>
+#include <asm/exception.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
@@ -94,19 +96,7 @@
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
- if (!low)
- return false;
-
- if (sp < low || sp >= high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_SDEI_NORMAL;
- }
-
- return true;
+ return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info);
}
static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
@@ -114,19 +104,7 @@
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
- if (!low)
- return false;
-
- if (sp < low || sp >= high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_SDEI_CRITICAL;
- }
-
- return true;
+ return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info);
}
bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
@@ -161,7 +139,7 @@
return 0;
}
- sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+ sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
if (arm64_kernel_unmapped_at_el0()) {
@@ -203,7 +181,7 @@
/*
* We didn't take an exception to get here, set PAN. UAO will be cleared
- * by sdei_event_handler()s set_fs(USER_DS) call.
+ * by sdei_event_handler()s force_uaccess_begin() call.
*/
__uaccess_enable_hw_pan();
@@ -246,26 +224,16 @@
}
-asmlinkage __kprobes notrace unsigned long
+asmlinkage noinstr unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
unsigned long ret;
- bool do_nmi_exit = false;
- /*
- * nmi_enter() deals with printk() re-entrance and use of RCU when
- * RCU believed this CPU was idle. Because critical events can
- * interrupt normal events, we may already be in_nmi().
- */
- if (!in_nmi()) {
- nmi_enter();
- do_nmi_exit = true;
- }
+ arm64_enter_nmi(regs);
ret = _sdei_handler(regs, arg);
- if (do_nmi_exit)
- nmi_exit();
+ arm64_exit_nmi(regs);
return ret;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f55f4a1..eb4b246 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -217,7 +217,7 @@
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
- for_each_memblock(memory, region) {
+ for_each_mem_region(region) {
res = &standard_resources[i++];
if (memblock_is_nomap(region)) {
res->name = "reserved";
@@ -257,7 +257,7 @@
if (!memblock_is_region_reserved(mem->start, mem_size))
continue;
- for_each_reserved_mem_region(j, &r_start, &r_end) {
+ for_each_reserved_mem_range(j, &r_start, &r_end) {
resource_size_t start, end;
start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
@@ -280,9 +280,8 @@
{
return __cpu_logical_map[cpu];
}
-EXPORT_SYMBOL_GPL(cpu_logical_map);
-void __init setup_arch(char **cmdline_p)
+void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
@@ -291,6 +290,13 @@
*cmdline_p = boot_command_line;
+ /*
+ * If know now we are going to need KPTI then use non-global
+ * mappings from the start, avoiding the cost of rewriting
+ * everything later.
+ */
+ arm64_use_ng_mappings = kaslr_requires_kpti();
+
early_fixmap_init();
early_ioremap_init();
@@ -318,6 +324,10 @@
xen_early_init();
efi_init();
+
+ if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0)
+ pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+
arm64_memblock_init();
paging_init();
@@ -343,7 +353,7 @@
else
psci_acpi_init();
- cpu_read_bootcpu_ops();
+ init_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
@@ -359,9 +369,6 @@
init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
#endif
-#ifdef CONFIG_VT
- conswitchp = &dummy_con;
-#endif
if (boot_args[1] || boot_args[2] || boot_args[3]) {
pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
@@ -373,8 +380,10 @@
static inline bool cpu_can_disable(unsigned int cpu)
{
#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_can_disable)
- return cpu_ops[cpu]->cpu_can_disable(cpu);
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops && ops->cpu_can_disable)
+ return ops->cpu_can_disable(cpu);
#endif
return false;
}
@@ -396,11 +405,7 @@
}
subsys_initcall(topology_init);
-/*
- * Dump out kernel offset information on panic.
- */
-static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
- void *p)
+static void dump_kernel_offset(void)
{
const unsigned long offset = kaslr_offset();
@@ -411,17 +416,25 @@
} else {
pr_emerg("Kernel Offset: disabled\n");
}
+}
+
+static int arm64_panic_block_dump(struct notifier_block *self,
+ unsigned long v, void *p)
+{
+ dump_kernel_offset();
+ dump_cpu_features();
+ dump_mem_limit();
return 0;
}
-static struct notifier_block kernel_offset_notifier = {
- .notifier_call = dump_kernel_offset
+static struct notifier_block arm64_panic_block = {
+ .notifier_call = arm64_panic_block_dump
};
-static int __init register_kernel_offset_dumper(void)
+static int __init register_arm64_panic_block(void)
{
atomic_notifier_chain_register(&panic_notifier_list,
- &kernel_offset_notifier);
+ &arm64_panic_block);
return 0;
}
-__initcall(register_kernel_offset_dumper);
+device_initcall(register_arm64_panic_block);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f6d3278..e620053 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -245,7 +245,8 @@
if (vq) {
/*
* This assumes that the SVE state has already been saved to
- * the task struct by calling preserve_fpsimd_context().
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
*/
err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
current->thread.sve_state,
@@ -372,6 +373,8 @@
goto done;
case FPSIMD_MAGIC:
+ if (!system_supports_fpsimd())
+ goto invalid;
if (user->fpsimd)
goto invalid;
@@ -507,7 +510,7 @@
if (err == 0)
err = parse_user_sigframe(&user, sf);
- if (err == 0) {
+ if (err == 0 && system_supports_fpsimd()) {
if (!user.fpsimd)
return -EINVAL;
@@ -624,7 +627,7 @@
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
- if (err == 0) {
+ if (err == 0 && system_supports_fpsimd()) {
struct fpsimd_context __user *fpsimd_ctx =
apply_user_offset(user, user->fpsimd_offset);
err |= preserve_fpsimd_context(fpsimd_ctx);
@@ -731,6 +734,25 @@
regs->regs[29] = (unsigned long)&user->next_frame->fp;
regs->pc = (unsigned long)ka->sa.sa_handler;
+ /*
+ * Signal delivery is a (wacky) indirect function call in
+ * userspace, so simulate the same setting of BTYPE as a BLR
+ * <register containing the signal handler entry point>.
+ * Signal delivery to a location in a PROT_BTI guarded page
+ * that is not a function entry point will now trigger a
+ * SIGILL in userspace.
+ *
+ * If the signal handler entry point is not in a PROT_BTI
+ * guarded page, this is harmless.
+ */
+ if (system_supports_bti()) {
+ regs->pstate &= ~PSR_BTYPE_MASK;
+ regs->pstate |= PSR_BTYPE_C;
+ }
+
+ /* TCO (Tag Check Override) always cleared for signal handlers */
+ regs->pstate &= ~PSR_TCO_BIT;
+
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
@@ -893,13 +915,6 @@
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned long thread_flags)
{
- /*
- * The assembly code enters us with IRQs off, but it hasn't
- * informed the tracing code of that for efficiency reasons.
- * Update the trace code with the current status.
- */
- trace_hardirqs_off();
-
do {
/* Check valid user FS if needed */
addr_limit_user_check();
@@ -915,11 +930,16 @@
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
+ if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
+ clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+ send_sig_fault(SIGSEGV, SEGV_MTEAERR,
+ (void __user *)NULL, current);
+ }
+
if (thread_flags & _TIF_SIGPENDING)
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 12a5853..2f507f5 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -223,7 +223,7 @@
err |= !valid_user_regs(®s->user_regs, current);
aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
- if (err == 0)
+ if (err == 0 && system_supports_fpsimd())
err |= compat_restore_vfp_context(&aux->vfp);
return err;
@@ -342,38 +342,13 @@
retcode = ptr_to_compat(ka->sa.sa_restorer);
} else {
/* Set up sigreturn pointer */
-#ifdef CONFIG_COMPAT_VDSO
- void *vdso_base = current->mm->context.vdso;
- void *vdso_trampoline;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (thumb) {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_rt_sigreturn_thumb);
- } else {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_rt_sigreturn_arm);
- }
- } else {
- if (thumb) {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_sigreturn_thumb);
- } else {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_sigreturn_arm);
- }
- }
-
- retcode = ptr_to_compat(vdso_trampoline) + thumb;
-#else
unsigned int idx = thumb << 1;
if (ka->sa.sa_flags & SA_SIGINFO)
idx += 3;
- retcode = (unsigned long)current->mm->context.vdso +
+ retcode = (unsigned long)current->mm->context.sigpage +
(idx << 2) + thumb;
-#endif
}
regs->regs[0] = usig;
@@ -419,7 +394,7 @@
aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
- if (err == 0)
+ if (err == 0 && system_supports_fpsimd())
err |= compat_preserve_vfp_context(&aux->vfp);
__put_user_error(0, &aux->end_magic, err);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index f5b04dd..ba40d57 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
+#include <asm/smp.h>
.text
/*
@@ -61,7 +62,7 @@
*
* x0 = struct sleep_stack_data area
*/
-ENTRY(__cpu_suspend_enter)
+SYM_FUNC_START(__cpu_suspend_enter)
stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
@@ -94,10 +95,10 @@
ldp x29, lr, [sp], #16
mov x0, #1
ret
-ENDPROC(__cpu_suspend_enter)
+SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
-ENTRY(cpu_resume)
+SYM_CODE_START(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
@@ -105,11 +106,11 @@
bl __enable_mmu
ldr x8, =_cpu_resume
br x8
-ENDPROC(cpu_resume)
+SYM_CODE_END(cpu_resume)
.ltorg
.popsection
-ENTRY(_cpu_resume)
+SYM_FUNC_START(_cpu_resume)
mrs x1, mpidr_el1
adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address
@@ -145,4 +146,4 @@
ldp x29, lr, [x29]
mov x0, #0
ret
-ENDPROC(_cpu_resume)
+SYM_FUNC_END(_cpu_resume)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 5465527..d624479 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -9,7 +9,6 @@
#include <asm/assembler.h>
.macro SMCCC instr
- .cfi_startproc
\instr #0
ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
@@ -21,7 +20,6 @@
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
1: ret
- .cfi_endproc
.endm
/*
@@ -30,9 +28,9 @@
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
* struct arm_smccc_quirk *quirk)
*/
-ENTRY(__arm_smccc_smc)
+SYM_FUNC_START(__arm_smccc_smc)
SMCCC smc
-ENDPROC(__arm_smccc_smc)
+SYM_FUNC_END(__arm_smccc_smc)
EXPORT_SYMBOL(__arm_smccc_smc)
/*
@@ -41,7 +39,7 @@
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
* struct arm_smccc_quirk *quirk)
*/
-ENTRY(__arm_smccc_hvc)
+SYM_FUNC_START(__arm_smccc_hvc)
SMCCC hvc
-ENDPROC(__arm_smccc_hvc)
+SYM_FUNC_END(__arm_smccc_hvc)
EXPORT_SYMBOL(__arm_smccc_hvc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 987220a..feee5a3 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -30,7 +30,9 @@
#include <linux/completion.h>
#include <linux/of.h>
#include <linux/irq_work.h>
+#include <linux/kernel_stat.h>
#include <linux/kexec.h>
+#include <linux/kvm_host.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -39,10 +41,9 @@
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
+#include <asm/kvm_mmu.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
@@ -63,7 +64,7 @@
*/
struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
-int cpus_stuck_in_kernel;
+static int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
@@ -72,10 +73,18 @@
IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
- IPI_WAKEUP
+ IPI_WAKEUP,
+ NR_IPI
};
+static int ipi_irq_base __read_mostly;
+static int nr_ipi __read_mostly = NR_IPI;
+static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
+
+static void ipi_setup(int cpu);
+
#ifdef CONFIG_HOTPLUG_CPU
+static void ipi_teardown(int cpu);
static int op_cpu_kill(unsigned int cpu);
#else
static inline int op_cpu_kill(unsigned int cpu)
@@ -91,8 +100,10 @@
*/
static int boot_secondary(unsigned int cpu, struct task_struct *idle)
{
- if (cpu_ops[cpu]->cpu_boot)
- return cpu_ops[cpu]->cpu_boot(cpu);
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops->cpu_boot)
+ return ops->cpu_boot(cpu);
return -EOPNOTSUPP;
}
@@ -113,63 +124,58 @@
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
- /*
- * Now bring the CPU into our world.
- */
+ /* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
- if (ret == 0) {
- /*
- * CPU was successfully started, wait for it to come online or
- * time out.
- */
- wait_for_completion_timeout(&cpu_running,
- msecs_to_jiffies(5000));
-
- if (!cpu_online(cpu)) {
- pr_crit("CPU%u: failed to come online\n", cpu);
- ret = -EIO;
- }
- } else {
+ if (ret) {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
+ /*
+ * CPU was successfully started, wait for it to come online or
+ * time out.
+ */
+ wait_for_completion_timeout(&cpu_running,
+ msecs_to_jiffies(5000));
+ if (cpu_online(cpu))
+ return 0;
+
+ pr_crit("CPU%u: failed to come online\n", cpu);
secondary_data.task = NULL;
secondary_data.stack = NULL;
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
status = READ_ONCE(secondary_data.status);
- if (ret && status) {
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
- if (status == CPU_MMU_OFF)
- status = READ_ONCE(__early_cpu_boot_status);
-
- switch (status & CPU_BOOT_STATUS_MASK) {
- default:
- pr_err("CPU%u: failed in unknown state : 0x%lx\n",
- cpu, status);
- cpus_stuck_in_kernel++;
+ switch (status & CPU_BOOT_STATUS_MASK) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
break;
- case CPU_KILL_ME:
- if (!op_cpu_kill(cpu)) {
- pr_crit("CPU%u: died during early boot\n", cpu);
- break;
- }
- pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
- /* Fall through */
- case CPU_STUCK_IN_KERNEL:
- pr_crit("CPU%u: is stuck in kernel\n", cpu);
- if (status & CPU_STUCK_REASON_52_BIT_VA)
- pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
- if (status & CPU_STUCK_REASON_NO_GRAN)
- pr_crit("CPU%u: does not support %luK granule \n", cpu, PAGE_SIZE / SZ_1K);
- cpus_stuck_in_kernel++;
- break;
- case CPU_PANIC_KERNEL:
- panic("CPU%u detected unsupported configuration\n", cpu);
}
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ fallthrough;
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ if (status & CPU_STUCK_REASON_52_BIT_VA)
+ pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
+ if (status & CPU_STUCK_REASON_NO_GRAN) {
+ pr_crit("CPU%u: does not support %luK granule\n",
+ cpu, PAGE_SIZE / SZ_1K);
+ }
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
}
- return ret;
+ return -EIO;
}
static void init_gic_priority_masking(void)
@@ -194,6 +200,7 @@
{
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
struct mm_struct *mm = &init_mm;
+ const struct cpu_operations *ops;
unsigned int cpu;
cpu = task_cpu(current);
@@ -216,7 +223,6 @@
init_gic_priority_masking();
rcu_cpu_starting(cpu);
- preempt_disable();
trace_hardirqs_off();
/*
@@ -226,8 +232,9 @@
*/
check_local_cpu_capabilities();
- if (cpu_ops[cpu]->cpu_postboot)
- cpu_ops[cpu]->cpu_postboot();
+ ops = get_cpu_ops(cpu);
+ if (ops->cpu_postboot)
+ ops->cpu_postboot();
/*
* Log the CPU info before it is marked online and might get read.
@@ -239,6 +246,8 @@
*/
notify_cpu_starting(cpu);
+ ipi_setup(cpu);
+
store_cpu_topology(cpu);
numa_add_cpu(cpu);
@@ -265,19 +274,21 @@
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_disable(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
/*
* If we don't have a cpu_die method, abort before we reach the point
* of no return. CPU0 may not have an cpu_ops, so test for it.
*/
- if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+ if (!ops || !ops->cpu_die)
return -EOPNOTSUPP;
/*
* We may need to abort a hot unplug for some other mechanism-specific
* reason.
*/
- if (cpu_ops[cpu]->cpu_disable)
- return cpu_ops[cpu]->cpu_disable(cpu);
+ if (ops->cpu_disable)
+ return ops->cpu_disable(cpu);
return 0;
}
@@ -302,6 +313,7 @@
* and we must not schedule until we're ready to give up the cpu.
*/
set_cpu_online(cpu, false);
+ ipi_teardown(cpu);
/*
* OK - migrate IRQs away from this CPU
@@ -313,15 +325,17 @@
static int op_cpu_kill(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
/*
* If we have no means of synchronising with the dying CPU, then assume
* that it is really dead. We can only wait for an arbitrary length of
* time and hope that it's dead, so let's skip the wait and just hope.
*/
- if (!cpu_ops[cpu]->cpu_kill)
+ if (!ops->cpu_kill)
return 0;
- return cpu_ops[cpu]->cpu_kill(cpu);
+ return ops->cpu_kill(cpu);
}
/*
@@ -346,8 +360,7 @@
*/
err = op_cpu_kill(cpu);
if (err)
- pr_warn("CPU%d may not have shut down cleanly: %d\n",
- cpu, err);
+ pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
}
/*
@@ -357,6 +370,7 @@
void cpu_die(void)
{
unsigned int cpu = smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
idle_task_exit();
@@ -370,12 +384,22 @@
* mechanism must perform all required cache maintenance to ensure that
* no dirty lines are lost in the process of shutting down the CPU.
*/
- cpu_ops[cpu]->cpu_die(cpu);
+ ops->cpu_die(cpu);
BUG();
}
#endif
+static void __cpu_try_die(int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops && ops->cpu_die)
+ ops->cpu_die(cpu);
+#endif
+}
+
/*
* Kill the calling secondary CPU, early in bringup before it is turned
* online.
@@ -390,12 +414,11 @@
set_cpu_present(cpu, 0);
rcu_report_dead(cpu);
-#ifdef CONFIG_HOTPLUG_CPU
- update_cpu_boot_status(CPU_KILL_ME);
- /* Check if we can park ourselves */
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
-#endif
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+ update_cpu_boot_status(CPU_KILL_ME);
+ __cpu_try_die(cpu);
+ }
+
update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
cpu_park_loop();
@@ -410,6 +433,8 @@
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
+ if (IS_ENABLED(CONFIG_KVM))
+ kvm_compute_layout();
}
void __init smp_cpus_done(unsigned int max_cpus)
@@ -487,10 +512,13 @@
*/
static int __init smp_cpu_setup(int cpu)
{
- if (cpu_read_ops(cpu))
+ const struct cpu_operations *ops;
+
+ if (init_cpu_ops(cpu))
return -ENODEV;
- if (cpu_ops[cpu]->cpu_init(cpu))
+ ops = get_cpu_ops(cpu);
+ if (ops->cpu_init(cpu))
return -ENODEV;
set_cpu_possible(cpu, true);
@@ -713,6 +741,7 @@
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ const struct cpu_operations *ops;
int err;
unsigned int cpu;
unsigned int this_cpu;
@@ -743,10 +772,11 @@
if (cpu == smp_processor_id())
continue;
- if (!cpu_ops[cpu])
+ ops = get_cpu_ops(cpu);
+ if (!ops)
continue;
- err = cpu_ops[cpu]->cpu_prepare(cpu);
+ err = ops->cpu_prepare(cpu);
if (err)
continue;
@@ -755,13 +785,6 @@
}
}
-void (*__smp_cross_call)(const struct cpumask *, unsigned int);
-
-void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
-{
- __smp_cross_call = fn;
-}
-
static const char *ipi_types[NR_IPI] __tracepoint_string = {
#define S(x,s) [x] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
@@ -773,35 +796,25 @@
S(IPI_WAKEUP, "CPU wake-up interrupts"),
};
-static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
-{
- trace_ipi_raise(target, ipi_types[ipinr]);
- __smp_cross_call(target, ipinr);
-}
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
-void show_ipi_list(struct seq_file *p, int prec)
+unsigned long irq_err_count;
+
+int arch_show_interrupts(struct seq_file *p, int prec)
{
unsigned int cpu, i;
for (i = 0; i < NR_IPI; i++) {
+ unsigned int irq = irq_desc_get_irq(ipi_desc[i]);
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ",
- __get_irq_stat(cpu, ipi_irqs[i]));
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
-}
-u64 smp_irq_stat_cpu(unsigned int cpu)
-{
- u64 sum = 0;
- int i;
-
- for (i = 0; i < NR_IPI; i++)
- sum += __get_irq_stat(cpu, ipi_irqs[i]);
-
- return sum;
+ seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
+ return 0;
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -824,8 +837,7 @@
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- if (__smp_cross_call)
- smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+ smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
}
#endif
@@ -862,10 +874,8 @@
local_irq_disable();
sdei_mask_local_cpu();
-#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
-#endif
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ __cpu_try_die(cpu);
/* just in case */
cpu_park_loop();
@@ -875,15 +885,12 @@
/*
* Main handler for inter-processor interrupts
*/
-void handle_IPI(int ipinr, struct pt_regs *regs)
+static void do_handle_IPI(int ipinr)
{
unsigned int cpu = smp_processor_id();
- struct pt_regs *old_regs = set_irq_regs(regs);
- if ((unsigned)ipinr < NR_IPI) {
+ if ((unsigned)ipinr < NR_IPI)
trace_ipi_entry_rcuidle(ipi_types[ipinr]);
- __inc_irq_stat(cpu, ipi_irqs[ipinr]);
- }
switch (ipinr) {
case IPI_RESCHEDULE:
@@ -891,21 +898,16 @@
break;
case IPI_CALL_FUNC:
- irq_enter();
generic_smp_call_function_interrupt();
- irq_exit();
break;
case IPI_CPU_STOP:
- irq_enter();
local_cpu_stop();
- irq_exit();
break;
case IPI_CPU_CRASH_STOP:
if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
- irq_enter();
- ipi_cpu_crash_stop(cpu, regs);
+ ipi_cpu_crash_stop(cpu, get_irq_regs());
unreachable();
}
@@ -913,17 +915,13 @@
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
- irq_enter();
tick_receive_broadcast();
- irq_exit();
break;
#endif
#ifdef CONFIG_IRQ_WORK
case IPI_IRQ_WORK:
- irq_enter();
irq_work_run();
- irq_exit();
break;
#endif
@@ -942,7 +940,66 @@
if ((unsigned)ipinr < NR_IPI)
trace_ipi_exit_rcuidle(ipi_types[ipinr]);
- set_irq_regs(old_regs);
+}
+
+static irqreturn_t ipi_handler(int irq, void *data)
+{
+ do_handle_IPI(irq - ipi_irq_base);
+ return IRQ_HANDLED;
+}
+
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
+{
+ trace_ipi_raise(target, ipi_types[ipinr]);
+ __ipi_send_mask(ipi_desc[ipinr], target);
+}
+
+static void ipi_setup(int cpu)
+{
+ int i;
+
+ if (WARN_ON_ONCE(!ipi_irq_base))
+ return;
+
+ for (i = 0; i < nr_ipi; i++)
+ enable_percpu_irq(ipi_irq_base + i, 0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void ipi_teardown(int cpu)
+{
+ int i;
+
+ if (WARN_ON_ONCE(!ipi_irq_base))
+ return;
+
+ for (i = 0; i < nr_ipi; i++)
+ disable_percpu_irq(ipi_irq_base + i);
+}
+#endif
+
+void __init set_smp_ipi_range(int ipi_base, int n)
+{
+ int i;
+
+ WARN_ON(n < NR_IPI);
+ nr_ipi = min(n, NR_IPI);
+
+ for (i = 0; i < nr_ipi; i++) {
+ int err;
+
+ err = request_percpu_irq(ipi_base + i, ipi_handler,
+ "IPI", &cpu_number);
+ WARN_ON(err);
+
+ ipi_desc[i] = irq_to_desc(ipi_base + i);
+ irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ }
+
+ ipi_irq_base = ipi_base;
+
+ /* Setup the boot CPU immediately */
+ ipi_setup(smp_processor_id());
}
void smp_send_reschedule(int cpu)
@@ -989,8 +1046,8 @@
udelay(1);
if (num_other_online_cpus())
- pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(cpu_online_mask));
sdei_mask_local_cpu();
}
@@ -1034,8 +1091,8 @@
udelay(1);
if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
sdei_mask_local_cpu();
}
@@ -1058,8 +1115,9 @@
{
#ifdef CONFIG_HOTPLUG_CPU
int any_cpu = raw_smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(any_cpu);
- if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die)
+ if (ops && ops->cpu_die)
return true;
#endif
return false;
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index c8a3fee..056772c 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -19,7 +19,7 @@
#include <asm/smp_plat.h>
extern void secondary_holding_pen(void);
-volatile unsigned long __section(.mmuoff.data.read)
+volatile unsigned long __section(".mmuoff.data.read")
secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
@@ -83,9 +83,9 @@
/*
* We write the release address as LE regardless of the native
- * endianess of the kernel. Therefore, any boot-loaders that
+ * endianness of the kernel. Therefore, any boot-loaders that
* read this address need to convert this address to the
- * boot-loader's endianess before jumping. This is mandated by
+ * boot-loader's endianness before jumping. This is mandated by
* the boot protocol.
*/
writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
deleted file mode 100644
index 52cfc61..0000000
--- a/arch/arm64/kernel/ssbd.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
- */
-
-#include <linux/compat.h>
-#include <linux/errno.h>
-#include <linux/prctl.h>
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/thread_info.h>
-
-#include <asm/cpufeature.h>
-
-static void ssbd_ssbs_enable(struct task_struct *task)
-{
- u64 val = is_compat_thread(task_thread_info(task)) ?
- PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
-
- task_pt_regs(task)->pstate |= val;
-}
-
-static void ssbd_ssbs_disable(struct task_struct *task)
-{
- u64 val = is_compat_thread(task_thread_info(task)) ?
- PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
-
- task_pt_regs(task)->pstate &= ~val;
-}
-
-/*
- * prctl interface for SSBD
- */
-static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
-{
- int state = arm64_get_ssbd_state();
-
- /* Unsupported */
- if (state == ARM64_SSBD_UNKNOWN)
- return -EINVAL;
-
- /* Treat the unaffected/mitigated state separately */
- if (state == ARM64_SSBD_MITIGATED) {
- switch (ctrl) {
- case PR_SPEC_ENABLE:
- return -EPERM;
- case PR_SPEC_DISABLE:
- case PR_SPEC_FORCE_DISABLE:
- return 0;
- }
- }
-
- /*
- * Things are a bit backward here: the arm64 internal API
- * *enables the mitigation* when the userspace API *disables
- * speculation*. So much fun.
- */
- switch (ctrl) {
- case PR_SPEC_ENABLE:
- /* If speculation is force disabled, enable is not allowed */
- if (state == ARM64_SSBD_FORCE_ENABLE ||
- task_spec_ssb_force_disable(task))
- return -EPERM;
- task_clear_spec_ssb_disable(task);
- clear_tsk_thread_flag(task, TIF_SSBD);
- ssbd_ssbs_enable(task);
- break;
- case PR_SPEC_DISABLE:
- if (state == ARM64_SSBD_FORCE_DISABLE)
- return -EPERM;
- task_set_spec_ssb_disable(task);
- set_tsk_thread_flag(task, TIF_SSBD);
- ssbd_ssbs_disable(task);
- break;
- case PR_SPEC_FORCE_DISABLE:
- if (state == ARM64_SSBD_FORCE_DISABLE)
- return -EPERM;
- task_set_spec_ssb_disable(task);
- task_set_spec_ssb_force_disable(task);
- set_tsk_thread_flag(task, TIF_SSBD);
- ssbd_ssbs_disable(task);
- break;
- default:
- return -ERANGE;
- }
-
- return 0;
-}
-
-int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
- unsigned long ctrl)
-{
- switch (which) {
- case PR_SPEC_STORE_BYPASS:
- return ssbd_prctl_set(task, ctrl);
- default:
- return -ENODEV;
- }
-}
-
-static int ssbd_prctl_get(struct task_struct *task)
-{
- switch (arm64_get_ssbd_state()) {
- case ARM64_SSBD_UNKNOWN:
- return -EINVAL;
- case ARM64_SSBD_FORCE_ENABLE:
- return PR_SPEC_DISABLE;
- case ARM64_SSBD_KERNEL:
- if (task_spec_ssb_force_disable(task))
- return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
- if (task_spec_ssb_disable(task))
- return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
- return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- case ARM64_SSBD_FORCE_DISABLE:
- return PR_SPEC_ENABLE;
- default:
- return PR_SPEC_NOT_AFFECTED;
- }
-}
-
-int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
-{
- switch (which) {
- case PR_SPEC_STORE_BYPASS:
- return ssbd_prctl_get(task);
- default:
- return -ENODEV;
- }
-}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index a336cb1..c445828 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -14,6 +14,7 @@
#include <linux/stacktrace.h>
#include <asm/irq.h>
+#include <asm/pointer_auth.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
@@ -86,7 +87,7 @@
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
- (frame->pc == (unsigned long)return_to_handler)) {
+ (ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
struct ftrace_ret_stack *ret_stack;
/*
* This is a case where function graph tracer has
@@ -101,6 +102,8 @@
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ frame->pc = ptrauth_strip_insn_pac(frame->pc);
+
/*
* Frames created upon entry from EL0 have NULL FP and PC values, so
* don't bother reporting these. Frames created by __noreturn functions
@@ -115,12 +118,12 @@
NOKPROBE_SYMBOL(unwind_frame);
void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
- int (*fn)(struct stackframe *, void *), void *data)
+ bool (*fn)(void *, unsigned long), void *data)
{
while (1) {
int ret;
- if (fn(frame, data))
+ if (!fn(data, frame->pc))
break;
ret = unwind_frame(tsk, frame);
if (ret < 0)
@@ -129,84 +132,90 @@
}
NOKPROBE_SYMBOL(walk_stackframe);
-#ifdef CONFIG_STACKTRACE
-struct stack_trace_data {
- struct stack_trace *trace;
- unsigned int no_sched_functions;
- unsigned int skip;
-};
-
-static int save_trace(struct stackframe *frame, void *d)
+static void dump_backtrace_entry(unsigned long where, const char *loglvl)
{
- struct stack_trace_data *data = d;
- struct stack_trace *trace = data->trace;
- unsigned long addr = frame->pc;
+ printk("%s %pS\n", loglvl, (void *)where);
+}
- if (data->no_sched_functions && in_sched_functions(addr))
- return 0;
- if (data->skip) {
- data->skip--;
- return 0;
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
+ const char *loglvl)
+{
+ struct stackframe frame;
+ int skip = 0;
+
+ pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+ if (regs) {
+ if (user_mode(regs))
+ return;
+ skip = 1;
}
- trace->entries[trace->nr_entries++] = addr;
-
- return trace->nr_entries >= trace->max_entries;
-}
-
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
- struct stack_trace_data data;
- struct stackframe frame;
-
- data.trace = trace;
- data.skip = trace->skip;
- data.no_sched_functions = 0;
-
- start_backtrace(&frame, regs->regs[29], regs->pc);
- walk_stackframe(current, &frame, save_trace, &data);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-
-static noinline void __save_stack_trace(struct task_struct *tsk,
- struct stack_trace *trace, unsigned int nosched)
-{
- struct stack_trace_data data;
- struct stackframe frame;
+ if (!tsk)
+ tsk = current;
if (!try_get_task_stack(tsk))
return;
- data.trace = trace;
- data.skip = trace->skip;
- data.no_sched_functions = nosched;
-
- if (tsk != current) {
- start_backtrace(&frame, thread_saved_fp(tsk),
- thread_saved_pc(tsk));
- } else {
- /* We don't want this function nor the caller */
- data.skip += 2;
+ if (tsk == current) {
start_backtrace(&frame,
(unsigned long)__builtin_frame_address(0),
- (unsigned long)__save_stack_trace);
+ (unsigned long)dump_backtrace);
+ } else {
+ /*
+ * task blocked in __switch_to
+ */
+ start_backtrace(&frame,
+ thread_saved_fp(tsk),
+ thread_saved_pc(tsk));
}
- walk_stackframe(tsk, &frame, save_trace, &data);
+ printk("%sCall trace:\n", loglvl);
+ do {
+ /* skip until specified stack frame */
+ if (!skip) {
+ dump_backtrace_entry(frame.pc, loglvl);
+ } else if (frame.fp == regs->regs[29]) {
+ skip = 0;
+ /*
+ * Mostly, this is the case where this function is
+ * called in panic/abort. As exception handler's
+ * stack frame does not contain the corresponding pc
+ * at which an exception has taken place, use regs->pc
+ * instead.
+ */
+ dump_backtrace_entry(regs->pc, loglvl);
+ }
+ } while (!unwind_frame(tsk, &frame));
put_task_stack(tsk);
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
{
- __save_stack_trace(tsk, trace, 1);
+ dump_backtrace(NULL, tsk, loglvl);
+ barrier();
}
-void save_stack_trace(struct stack_trace *trace)
+#ifdef CONFIG_STACKTRACE
+
+noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
- __save_stack_trace(current, trace, 0);
+ struct stackframe frame;
+
+ if (regs)
+ start_backtrace(&frame, regs->regs[29], regs->pc);
+ else if (task == current)
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(1),
+ (unsigned long)__builtin_return_address(0));
+ else
+ start_backtrace(&frame, thread_saved_fp(task),
+ thread_saved_pc(task));
+
+ walk_stackframe(task, &frame, consume_entry, cookie);
}
-EXPORT_SYMBOL_GPL(save_stack_trace);
#endif
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 9405d1b..9f8cdec 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -3,13 +3,14 @@
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/pgtable.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
-#include <asm/pgtable.h>
+#include <asm/mte.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/smp_plat.h>
@@ -72,8 +73,10 @@
* have turned the mitigation on. If the user has forcefully
* disabled it, make sure their wishes are obeyed.
*/
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
- arm64_set_ssbd_mitigation(false);
+ spectre_v4_enable_mitigation(NULL);
+
+ /* Restore additional MTE-specific configuration */
+ mte_suspend_exit();
}
/*
@@ -117,7 +120,7 @@
if (!ret)
ret = -EOPNOTSUPP;
} else {
- __cpu_suspend_exit();
+ RCU_NONIDLE(__cpu_suspend_exit());
}
unpause_graph_tracing();
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 091c115..befde0e 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -98,12 +98,53 @@
regs->orig_x0 = regs->regs[0];
regs->syscallno = scno;
+ /*
+ * BTI note:
+ * The architecture does not guarantee that SPSR.BTYPE is zero
+ * on taking an SVC, so we could return to userspace with a
+ * non-zero BTYPE after the syscall.
+ *
+ * This shouldn't matter except when userspace is explicitly
+ * doing something stupid, such as setting PROT_BTI on a page
+ * that lacks conforming BTI/PACIxSP instructions, falling
+ * through from one executable page to another with differing
+ * PROT_BTI, or messing with BTYPE via ptrace: in such cases,
+ * userspace should not be surprised if a SIGILL occurs on
+ * syscall return.
+ *
+ * So, don't touch regs->pstate & PSR_BTYPE_MASK here.
+ * (Similarly for HVC and SMC elsewhere.)
+ */
+
cortex_a76_erratum_1463225_svc_handler();
- user_exit_irqoff();
local_daif_restore(DAIF_PROCCTX);
+ if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {
+ /*
+ * Process the asynchronous tag check fault before the actual
+ * syscall. do_notify_resume() will send a signal to userspace
+ * before the syscall is restarted.
+ */
+ syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0);
+ return;
+ }
+
if (has_syscall_work(flags)) {
- /* set default errno for user-issued syscall(-1) */
+ /*
+ * The de-facto standard way to skip a system call using ptrace
+ * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
+ * suitable error code for consumption by userspace. However,
+ * this cannot be distinguished from a user-issued syscall(-1)
+ * and so we must set x0 to -ENOSYS here in case the tracer doesn't
+ * issue the skip and we fall into trace_exit with x0 preserved.
+ *
+ * This is slightly odd because it also means that if a tracer
+ * sets the system call number to -1 but does not initialise x0,
+ * then x0 will be preserved for all system calls apart from a
+ * user-issued syscall(-1). However, requesting a skip and not
+ * setting the return value is unlikely to do anything sensible
+ * anyway.
+ */
if (scno == NO_SYSCALL)
syscall_set_return_value(current, regs, -ENOSYS, 0);
scno = syscall_trace_enter(regs);
@@ -121,15 +162,8 @@
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
local_daif_mask();
flags = current_thread_info()->flags;
- if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) {
- /*
- * We're off to userspace, where interrupts are
- * always enabled after we restore the flags from
- * the SPSR.
- */
- trace_hardirqs_on();
+ if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
return;
- }
local_daif_restore(DAIF_PROCCTX);
}
@@ -154,14 +188,14 @@
sve_user_disable();
}
-asmlinkage void el0_svc_handler(struct pt_regs *regs)
+void do_el0_svc(struct pt_regs *regs)
{
sve_user_discard();
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
#ifdef CONFIG_COMPAT
-asmlinkage void el0_svc_compat_handler(struct pt_regs *regs)
+void do_el0_svc_compat(struct pt_regs *regs)
{
el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
compat_sys_call_table);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 0b29464..eebbc8d 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -23,13 +23,14 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/clocksource.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
#include <linux/acpi.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/thread_info.h>
#include <asm/stacktrace.h>
+#include <asm/paravirt.h>
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -65,4 +66,6 @@
/* Calibrate the delay loop directly */
lpj_fine = arch_timer_rate / HZ;
+
+ pv_time_init();
}
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 113903d..543c67c 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -14,6 +14,7 @@
#include <linux/acpi.h>
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
+#include <linux/cpufreq.h>
#include <linux/init.h>
#include <linux/percpu.h>
@@ -122,4 +123,190 @@
}
#endif
+#ifdef CONFIG_ARM64_AMU_EXTN
+#undef pr_fmt
+#define pr_fmt(fmt) "AMU: " fmt
+
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
+static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
+static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
+static cpumask_var_t amu_fie_cpus;
+
+/* Initialize counter reference per-cpu variables for the current CPU */
+void init_cpu_freq_invariance_counters(void)
+{
+ this_cpu_write(arch_core_cycles_prev,
+ read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0));
+ this_cpu_write(arch_const_cycles_prev,
+ read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0));
+}
+
+static int validate_cpu_freq_invariance_counters(int cpu)
+{
+ u64 max_freq_hz, ratio;
+
+ if (!cpu_has_amu_feat(cpu)) {
+ pr_debug("CPU%d: counters are not supported.\n", cpu);
+ return -EINVAL;
+ }
+
+ if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
+ !per_cpu(arch_core_cycles_prev, cpu))) {
+ pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
+ return -EINVAL;
+ }
+
+ /* Convert maximum frequency from KHz to Hz and validate */
+ max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000;
+ if (unlikely(!max_freq_hz)) {
+ pr_debug("CPU%d: invalid maximum frequency.\n", cpu);
+ return -EINVAL;
+ }
+
+ /*
+ * Pre-compute the fixed ratio between the frequency of the constant
+ * counter and the maximum frequency of the CPU.
+ *
+ * const_freq
+ * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE²
+ * cpuinfo_max_freq
+ *
+ * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
+ * in order to ensure a good resolution for arch_max_freq_scale for
+ * very low arch timer frequencies (down to the KHz range which should
+ * be unlikely).
+ */
+ ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT);
+ ratio = div64_u64(ratio, max_freq_hz);
+ if (!ratio) {
+ WARN_ONCE(1, "System timer frequency too low.\n");
+ return -EINVAL;
+ }
+
+ per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio;
+
+ return 0;
+}
+
+static inline bool
+enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+ if (!policy) {
+ pr_debug("CPU%d: No cpufreq policy found.\n", cpu);
+ return false;
+ }
+
+ if (cpumask_subset(policy->related_cpus, valid_cpus))
+ cpumask_or(amu_fie_cpus, policy->related_cpus,
+ amu_fie_cpus);
+
+ cpufreq_cpu_put(policy);
+
+ return true;
+}
+
+static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
+#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
+
+static int __init init_amu_fie(void)
+{
+ cpumask_var_t valid_cpus;
+ bool have_policy = false;
+ int ret = 0;
+ int cpu;
+
+ if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto free_valid_mask;
+ }
+
+ for_each_present_cpu(cpu) {
+ if (validate_cpu_freq_invariance_counters(cpu))
+ continue;
+ cpumask_set_cpu(cpu, valid_cpus);
+ have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
+ }
+
+ /*
+ * If we are not restricted by cpufreq policies, we only enable
+ * the use of the AMU feature for FIE if all CPUs support AMU.
+ * Otherwise, enable_policy_freq_counters has already enabled
+ * policy cpus.
+ */
+ if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask))
+ cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus);
+
+ if (!cpumask_empty(amu_fie_cpus)) {
+ pr_info("CPUs[%*pbl]: counters will be used for FIE.",
+ cpumask_pr_args(amu_fie_cpus));
+ static_branch_enable(&amu_fie_key);
+ }
+
+ /*
+ * If the system is not fully invariant after AMU init, disable
+ * partial use of counters for frequency invariance.
+ */
+ if (!topology_scale_freq_invariant())
+ static_branch_disable(&amu_fie_key);
+
+free_valid_mask:
+ free_cpumask_var(valid_cpus);
+
+ return ret;
+}
+late_initcall_sync(init_amu_fie);
+
+bool arch_freq_counters_available(const struct cpumask *cpus)
+{
+ return amu_freq_invariant() &&
+ cpumask_subset(cpus, amu_fie_cpus);
+}
+
+void topology_scale_freq_tick(void)
+{
+ u64 prev_core_cnt, prev_const_cnt;
+ u64 core_cnt, const_cnt, scale;
+ int cpu = smp_processor_id();
+
+ if (!amu_freq_invariant())
+ return;
+
+ if (!cpumask_test_cpu(cpu, amu_fie_cpus))
+ return;
+
+ const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0);
+ core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
+ prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
+ prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+
+ if (unlikely(core_cnt <= prev_core_cnt ||
+ const_cnt <= prev_const_cnt))
+ goto store_and_exit;
+
+ /*
+ * /\core arch_max_freq_scale
+ * scale = ------- * --------------------
+ * /\const SCHED_CAPACITY_SCALE
+ *
+ * See validate_cpu_freq_invariance_counters() for details on
+ * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
+ */
+ scale = core_cnt - prev_core_cnt;
+ scale *= this_cpu_read(arch_max_freq_scale);
+ scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
+ const_cnt - prev_const_cnt);
+
+ scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
+ this_cpu_write(freq_scale, (unsigned long)scale);
+
+store_and_exit:
+ this_cpu_write(arch_core_cycles_prev, core_cnt);
+ this_cpu_write(arch_const_cycles_prev, const_cnt);
+}
+#endif /* CONFIG_ARM64_AMU_EXTN */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 4e3e9d9..2059d8f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -34,7 +34,10 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/extable.h>
#include <asm/insn.h>
+#include <asm/kprobes.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -52,11 +55,6 @@
int show_unhandled_signals = 0;
-static void dump_backtrace_entry(unsigned long where)
-{
- printk(" %pS\n", (void *)where);
-}
-
static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
@@ -82,70 +80,14 @@
printk("%sCode: %s\n", lvl, str);
}
-void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
-{
- struct stackframe frame;
- int skip = 0;
-
- pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
-
- if (regs) {
- if (user_mode(regs))
- return;
- skip = 1;
- }
-
- if (!tsk)
- tsk = current;
-
- if (!try_get_task_stack(tsk))
- return;
-
- if (tsk == current) {
- start_backtrace(&frame,
- (unsigned long)__builtin_frame_address(0),
- (unsigned long)dump_backtrace);
- } else {
- /*
- * task blocked in __switch_to
- */
- start_backtrace(&frame,
- thread_saved_fp(tsk),
- thread_saved_pc(tsk));
- }
-
- printk("Call trace:\n");
- do {
- /* skip until specified stack frame */
- if (!skip) {
- dump_backtrace_entry(frame.pc);
- } else if (frame.fp == regs->regs[29]) {
- skip = 0;
- /*
- * Mostly, this is the case where this function is
- * called in panic/abort. As exception handler's
- * stack frame does not contain the corresponding pc
- * at which an exception has taken place, use regs->pc
- * instead.
- */
- dump_backtrace_entry(regs->pc);
- }
- } while (!unwind_frame(tsk, &frame));
-
- put_task_stack(tsk);
-}
-
-void show_stack(struct task_struct *tsk, unsigned long *sp)
-{
- dump_backtrace(NULL, tsk);
- barrier();
-}
-
#ifdef CONFIG_PREEMPT
#define S_PREEMPT " PREEMPT"
+#elif defined(CONFIG_PREEMPT_RT)
+#define S_PREEMPT " PREEMPT_RT"
#else
#define S_PREEMPT ""
#endif
+
#define S_SMP " SMP"
static int __die(const char *str, int err, struct pt_regs *regs)
@@ -195,9 +137,9 @@
oops_exit();
if (in_interrupt())
- panic("Fatal exception in interrupt");
+ panic("%s: Fatal exception in interrupt", str);
if (panic_on_oops)
- panic("Fatal exception");
+ panic("%s: Fatal exception", str);
raw_spin_unlock_irqrestore(&die_lock, flags);
@@ -268,6 +210,61 @@
}
}
+#ifdef CONFIG_COMPAT
+#define PSTATE_IT_1_0_SHIFT 25
+#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT)
+#define PSTATE_IT_7_2_SHIFT 10
+#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT)
+
+static u32 compat_get_it_state(struct pt_regs *regs)
+{
+ u32 it, pstate = regs->pstate;
+
+ it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
+ it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
+
+ return it;
+}
+
+static void compat_set_it_state(struct pt_regs *regs, u32 it)
+{
+ u32 pstate_it;
+
+ pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
+ pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
+
+ regs->pstate &= ~PSR_AA32_IT_MASK;
+ regs->pstate |= pstate_it;
+}
+
+static void advance_itstate(struct pt_regs *regs)
+{
+ u32 it;
+
+ /* ARM mode */
+ if (!(regs->pstate & PSR_AA32_T_BIT) ||
+ !(regs->pstate & PSR_AA32_IT_MASK))
+ return;
+
+ it = compat_get_it_state(regs);
+
+ /*
+ * If this is the last instruction of the block, wipe the IT
+ * state. Otherwise advance it.
+ */
+ if (!(it & 7))
+ it = 0;
+ else
+ it = (it & 0xe0) | ((it << 1) & 0x1f);
+
+ compat_set_it_state(regs, it);
+}
+#else
+static void advance_itstate(struct pt_regs *regs)
+{
+}
+#endif
+
void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
{
regs->pc += size;
@@ -278,6 +275,11 @@
*/
if (user_mode(regs))
user_fastforward_single_step(current);
+
+ if (compat_user_mode(regs))
+ advance_itstate(regs);
+ else
+ regs->pstate &= ~PSR_BTYPE_MASK;
}
static LIST_HEAD(undef_hook);
@@ -311,7 +313,7 @@
if (!user_mode(regs)) {
__le32 instr_le;
- if (probe_kernel_address((__force __le32 *)pc, instr_le))
+ if (get_kernel_nofault(instr_le, (__force __le32 *)pc))
goto exit;
instr = le32_to_cpu(instr_le);
} else if (compat_thumb_mode(regs)) {
@@ -347,7 +349,7 @@
return fn ? fn(regs, instr) : 1;
}
-void force_signal_inject(int signal, int code, unsigned long address)
+void force_signal_inject(int signal, int code, unsigned long address, unsigned int err)
{
const char *desc;
struct pt_regs *regs = current_pt_regs();
@@ -373,7 +375,7 @@
signal = SIGKILL;
}
- arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0);
+ arm64_notify_die(desc, regs, signal, code, (void __user *)address, err);
}
/*
@@ -383,17 +385,17 @@
{
int code;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
if (find_vma(current->mm, addr) == NULL)
code = SEGV_MAPERR;
else
code = SEGV_ACCERR;
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
- force_signal_inject(SIGSEGV, code, addr);
+ force_signal_inject(SIGSEGV, code, addr, 0);
}
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+void do_undefinstr(struct pt_regs *regs)
{
/* check for AArch32 breakpoint instructions */
if (!aarch32_break_handler(regs))
@@ -403,8 +405,27 @@
return;
BUG_ON(!user_mode(regs));
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
}
+NOKPROBE_SYMBOL(do_undefinstr);
+
+void do_bti(struct pt_regs *regs)
+{
+ BUG_ON(!user_mode(regs));
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+}
+NOKPROBE_SYMBOL(do_bti);
+
+void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr)
+{
+ /*
+ * Unexpected FPAC exception or pointer authentication failure in
+ * the kernel: kill the task before it does any more harm.
+ */
+ BUG_ON(!user_mode(regs));
+ force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
+}
+NOKPROBE_SYMBOL(do_ptrauth_fault);
#define __user_cache_maint(insn, address, res) \
if (address >= user_addr_max()) { \
@@ -455,7 +476,7 @@
__user_cache_maint("ic ivau", address, ret);
break;
default:
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
@@ -508,7 +529,7 @@
sysreg = esr_sys64_to_sysreg(esr);
if (do_emulate_mrs(regs, sysreg, rt) != 0)
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
}
static void wfi_handler(unsigned int esr, struct pt_regs *regs)
@@ -561,34 +582,7 @@
{},
};
-
#ifdef CONFIG_COMPAT
-#define PSTATE_IT_1_0_SHIFT 25
-#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT)
-#define PSTATE_IT_7_2_SHIFT 10
-#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT)
-
-static u32 compat_get_it_state(struct pt_regs *regs)
-{
- u32 it, pstate = regs->pstate;
-
- it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
- it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
-
- return it;
-}
-
-static void compat_set_it_state(struct pt_regs *regs, u32 it)
-{
- u32 pstate_it;
-
- pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
- pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
-
- regs->pstate &= ~PSR_AA32_IT_MASK;
- regs->pstate |= pstate_it;
-}
-
static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
{
int cond;
@@ -609,42 +603,12 @@
return aarch32_opcode_cond_checks[cond](regs->pstate);
}
-static void advance_itstate(struct pt_regs *regs)
-{
- u32 it;
-
- /* ARM mode */
- if (!(regs->pstate & PSR_AA32_T_BIT) ||
- !(regs->pstate & PSR_AA32_IT_MASK))
- return;
-
- it = compat_get_it_state(regs);
-
- /*
- * If this is the last instruction of the block, wipe the IT
- * state. Otherwise advance it.
- */
- if (!(it & 7))
- it = 0;
- else
- it = (it & 0xe0) | ((it << 1) & 0x1f);
-
- compat_set_it_state(regs, it);
-}
-
-static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs,
- unsigned int sz)
-{
- advance_itstate(regs);
- arm64_skip_faulting_instruction(regs, sz);
-}
-
static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
{
int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
pt_regs_write_reg(regs, reg, arch_timer_get_rate());
- arm64_compat_skip_faulting_instruction(regs, 4);
+ arm64_skip_faulting_instruction(regs, 4);
}
static const struct sys64_hook cp15_32_hooks[] = {
@@ -664,7 +628,7 @@
pt_regs_write_reg(regs, rt, lower_32_bits(val));
pt_regs_write_reg(regs, rt2, upper_32_bits(val));
- arm64_compat_skip_faulting_instruction(regs, 4);
+ arm64_skip_faulting_instruction(regs, 4);
}
static const struct sys64_hook cp15_64_hooks[] = {
@@ -676,7 +640,7 @@
{},
};
-asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned int esr, struct pt_regs *regs)
{
const struct sys64_hook *hook, *hook_base;
@@ -685,7 +649,7 @@
* There is no T16 variant of a CP access, so we
* always advance PC by 4 bytes.
*/
- arm64_compat_skip_faulting_instruction(regs, 4);
+ arm64_skip_faulting_instruction(regs, 4);
return;
}
@@ -714,9 +678,10 @@
*/
do_undefinstr(regs);
}
+NOKPROBE_SYMBOL(do_cp15instr);
#endif
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned int esr, struct pt_regs *regs)
{
const struct sys64_hook *hook;
@@ -733,6 +698,7 @@
*/
do_undefinstr(regs);
}
+NOKPROBE_SYMBOL(do_sysinstr);
static const char *esr_class_str[] = {
[0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC",
@@ -746,6 +712,7 @@
[ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS",
[ESR_ELx_EC_PAC] = "PAC",
[ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC",
+ [ESR_ELx_EC_BTI] = "BTI",
[ESR_ELx_EC_ILL] = "PSTATE.IL",
[ESR_ELx_EC_SVC32] = "SVC (AArch32)",
[ESR_ELx_EC_HVC32] = "HVC (AArch32)",
@@ -756,6 +723,7 @@
[ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)",
[ESR_ELx_EC_SVE] = "SVE",
[ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB",
+ [ESR_ELx_EC_FPAC] = "FPAC",
[ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF",
[ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)",
[ESR_ELx_EC_IABT_CUR] = "IABT (current EL)",
@@ -786,14 +754,17 @@
* bad_mode handles the impossible case in the exception vector. This is always
* fatal.
*/
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
+ arm64_enter_nmi(regs);
+
console_verbose();
pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));
+ __show_regs(regs);
local_daif_mask();
panic("bad mode");
}
@@ -802,7 +773,7 @@
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
* exceptions taken from EL0. Unlike bad_mode, this returns.
*/
-asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
{
void __user *pc = (void __user *)instruction_pointer(regs);
@@ -818,7 +789,7 @@
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
-asmlinkage void handle_bad_stack(struct pt_regs *regs)
+asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
@@ -826,6 +797,8 @@
unsigned int esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);
+ arm64_enter_nmi(regs);
+
console_verbose();
pr_emerg("Insufficient stack space to handle exception!");
@@ -835,7 +808,7 @@
pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
tsk_stk, tsk_stk + THREAD_SIZE);
pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n",
- irq_stk, irq_stk + THREAD_SIZE);
+ irq_stk, irq_stk + IRQ_STACK_SIZE);
pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
@@ -897,46 +870,15 @@
}
}
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
{
- const bool was_in_nmi = in_nmi();
-
- if (!was_in_nmi)
- nmi_enter();
+ arm64_enter_nmi(regs);
/* non-RAS errors are not containable */
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
arm64_serror_panic(regs, esr);
- if (!was_in_nmi)
- nmi_exit();
-}
-
-asmlinkage void enter_from_user_mode(void)
-{
- CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit_irqoff();
-}
-NOKPROBE_SYMBOL(enter_from_user_mode);
-
-void __pte_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
-}
-
-void __pmd_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pmd %016lx.\n", file, line, val);
-}
-
-void __pud_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pud %016lx.\n", file, line, val);
-}
-
-void __pgd_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pgd %016lx.\n", file, line, val);
+ arm64_exit_nmi(regs);
}
/* GENERIC_BUG traps */
@@ -978,6 +920,21 @@
.imm = BUG_BRK_IMM,
};
+static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
+{
+ pr_err("%s generated an invalid instruction at %pS!\n",
+ in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching",
+ (void *)instruction_pointer(regs));
+
+ /* We cannot handle this */
+ return DBG_HOOK_ERROR;
+}
+
+static struct break_hook fault_break_hook = {
+ .fn = reserved_fault_handler,
+ .imm = FAULT_BRK_IMM,
+};
+
#ifdef CONFIG_KASAN_SW_TAGS
#define KASAN_ESR_RECOVER 0x20
@@ -1040,11 +997,12 @@
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
}
-/* This registration must happen early, before debug_traps_init(). */
void __init trap_init(void)
{
register_kernel_break_hook(&bug_break_hook);
+ register_kernel_break_hook(&fault_break_hook);
#ifdef CONFIG_KASAN_SW_TAGS
register_kernel_break_hook(&kasan_break_hook);
#endif
+ debug_traps_init();
}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 033a48f..debb899 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
+#include <linux/time_namespace.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
@@ -29,24 +30,20 @@
#include <asm/vdso.h>
extern char vdso_start[], vdso_end[];
-#ifdef CONFIG_COMPAT_VDSO
extern char vdso32_start[], vdso32_end[];
-#endif /* CONFIG_COMPAT_VDSO */
-/* vdso_lookup arch_index */
-enum arch_vdso_type {
- ARM64_VDSO = 0,
-#ifdef CONFIG_COMPAT_VDSO
- ARM64_VDSO32 = 1,
-#endif /* CONFIG_COMPAT_VDSO */
+enum vdso_abi {
+ VDSO_ABI_AA64,
+ VDSO_ABI_AA32,
};
-#ifdef CONFIG_COMPAT_VDSO
-#define VDSO_TYPES (ARM64_VDSO32 + 1)
-#else
-#define VDSO_TYPES (ARM64_VDSO + 1)
-#endif /* CONFIG_COMPAT_VDSO */
-struct __vdso_abi {
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
+};
+
+struct vdso_abi_info {
const char *name;
const char *vdso_code_start;
const char *vdso_code_end;
@@ -57,14 +54,14 @@
struct vm_special_mapping *cm;
};
-static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
- {
+static struct vdso_abi_info vdso_info[] __ro_after_init = {
+ [VDSO_ABI_AA64] = {
.name = "vdso",
.vdso_code_start = vdso_start,
.vdso_code_end = vdso_end,
},
#ifdef CONFIG_COMPAT_VDSO
- {
+ [VDSO_ABI_AA32] = {
.name = "vdso32",
.vdso_code_start = vdso32_start,
.vdso_code_end = vdso32_end,
@@ -81,13 +78,13 @@
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
-static int __vdso_remap(enum arch_vdso_type arch_index,
+static int __vdso_remap(enum vdso_abi abi,
const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
- unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
- vdso_lookup[arch_index].vdso_code_start;
+ unsigned long vdso_size = vdso_info[abi].vdso_code_end -
+ vdso_info[abi].vdso_code_start;
if (vdso_size != new_size)
return -EINVAL;
@@ -97,56 +94,156 @@
return 0;
}
-static int __vdso_init(enum arch_vdso_type arch_index)
+static int __vdso_init(enum vdso_abi abi)
{
int i;
struct page **vdso_pagelist;
unsigned long pfn;
- if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
+ if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
- vdso_lookup[arch_index].vdso_pages = (
- vdso_lookup[arch_index].vdso_code_end -
- vdso_lookup[arch_index].vdso_code_start) >>
+ vdso_info[abi].vdso_pages = (
+ vdso_info[abi].vdso_code_end -
+ vdso_info[abi].vdso_code_start) >>
PAGE_SHIFT;
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
+ vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
sizeof(struct page *),
GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
/* Grab the vDSO code pages. */
- pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
+ pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
- for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ for (i = 0; i < vdso_info[abi].vdso_pages; i++)
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
- vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
- vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
+ vdso_info[abi].cm->pages = vdso_pagelist;
return 0;
}
-static int __setup_additional_pages(enum arch_vdso_type arch_index,
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
+ zap_page_range(vma, vma->vm_start, size);
+#ifdef CONFIG_COMPAT_VDSO
+ if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
+ zap_page_range(vma, vma->vm_start, size);
+#endif
+ }
+
+ mmap_read_unlock(mm);
+ return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops.
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static int vvar_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+ if (new_size != VVAR_NR_PAGES * PAGE_SIZE)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
int uses_interp)
{
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ unsigned long gp_flags = 0;
void *ret;
- vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
+ BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+
+ vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+ vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -154,18 +251,21 @@
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
- vdso_lookup[arch_index].dm);
+ ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
+ VM_READ|VM_MAYREAD|VM_PFNMAP,
+ vdso_info[abi].dm);
if (IS_ERR(ret))
goto up_fail;
- vdso_base += PAGE_SIZE;
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
+ gp_flags = VM_ARM64_BTI;
+
+ vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
- VM_READ|VM_EXEC|
+ VM_READ|VM_EXEC|gp_flags|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_lookup[arch_index].cm);
+ vdso_info[abi].cm);
if (IS_ERR(ret))
goto up_fail;
@@ -180,52 +280,40 @@
/*
* Create and map the vectors page for AArch32 tasks.
*/
-#ifdef CONFIG_COMPAT_VDSO
static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
- return __vdso_remap(ARM64_VDSO32, sm, new_vma);
+ return __vdso_remap(VDSO_ABI_AA32, sm, new_vma);
}
-#endif /* CONFIG_COMPAT_VDSO */
-/*
- * aarch32_vdso_pages:
- * 0 - kuser helpers
- * 1 - sigreturn code
- * or (CONFIG_COMPAT_VDSO):
- * 0 - kuser helpers
- * 1 - vdso data
- * 2 - vdso code
- */
-#define C_VECTORS 0
-#ifdef CONFIG_COMPAT_VDSO
-#define C_VVAR 1
-#define C_VDSO 2
-#define C_PAGES (C_VDSO + 1)
-#else
-#define C_SIGPAGE 1
-#define C_PAGES (C_SIGPAGE + 1)
-#endif /* CONFIG_COMPAT_VDSO */
-static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
-static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
- {
+enum aarch32_map {
+ AA32_MAP_VECTORS, /* kuser helpers */
+ AA32_MAP_SIGPAGE,
+ AA32_MAP_VVAR,
+ AA32_MAP_VDSO,
+};
+
+static struct page *aarch32_vectors_page __ro_after_init;
+static struct page *aarch32_sig_page __ro_after_init;
+
+static struct vm_special_mapping aarch32_vdso_maps[] = {
+ [AA32_MAP_VECTORS] = {
.name = "[vectors]", /* ABI */
- .pages = &aarch32_vdso_pages[C_VECTORS],
+ .pages = &aarch32_vectors_page,
},
-#ifdef CONFIG_COMPAT_VDSO
- {
+ [AA32_MAP_SIGPAGE] = {
+ .name = "[sigpage]", /* ABI */
+ .pages = &aarch32_sig_page,
+ },
+ [AA32_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
+ .mremap = vvar_mremap,
},
- {
+ [AA32_MAP_VDSO] = {
.name = "[vdso]",
.mremap = aarch32_vdso_mremap,
},
-#else
- {
- .name = "[sigpage]", /* ABI */
- .pages = &aarch32_vdso_pages[C_SIGPAGE],
- },
-#endif /* CONFIG_COMPAT_VDSO */
};
static int aarch32_alloc_kuser_vdso_page(void)
@@ -243,52 +331,52 @@
memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
kuser_sz);
- aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page);
- flush_dcache_page(aarch32_vdso_pages[C_VECTORS]);
+ aarch32_vectors_page = virt_to_page(vdso_page);
+ flush_dcache_page(aarch32_vectors_page);
return 0;
}
-#ifdef CONFIG_COMPAT_VDSO
-static int __aarch32_alloc_vdso_pages(void)
-{
- int ret;
-
- vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
- vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
-
- ret = __vdso_init(ARM64_VDSO32);
- if (ret)
- return ret;
-
- return aarch32_alloc_kuser_vdso_page();
-}
-#else
-static int __aarch32_alloc_vdso_pages(void)
+static int aarch32_alloc_sigpage(void)
{
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
unsigned long sigpage;
- int ret;
sigpage = get_zeroed_page(GFP_ATOMIC);
if (!sigpage)
return -ENOMEM;
memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
- aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage);
- flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]);
-
- ret = aarch32_alloc_kuser_vdso_page();
- if (ret)
- free_page(sigpage);
-
- return ret;
+ aarch32_sig_page = virt_to_page(sigpage);
+ flush_dcache_page(aarch32_sig_page);
+ return 0;
}
-#endif /* CONFIG_COMPAT_VDSO */
+
+static int __aarch32_alloc_vdso_pages(void)
+{
+
+ if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
+ return 0;
+
+ vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
+ vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
+
+ return __vdso_init(VDSO_ABI_AA32);
+}
static int __init aarch32_alloc_vdso_pages(void)
{
- return __aarch32_alloc_vdso_pages();
+ int ret;
+
+ ret = __aarch32_alloc_vdso_pages();
+ if (ret)
+ return ret;
+
+ ret = aarch32_alloc_sigpage();
+ if (ret)
+ return ret;
+
+ return aarch32_alloc_kuser_vdso_page();
}
arch_initcall(aarch32_alloc_vdso_pages);
@@ -306,12 +394,11 @@
ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYEXEC,
- &aarch32_vdso_spec[C_VECTORS]);
+ &aarch32_vdso_maps[AA32_MAP_VECTORS]);
return PTR_ERR_OR_ZERO(ret);
}
-#ifndef CONFIG_COMPAT_VDSO
static int aarch32_sigreturn_setup(struct mm_struct *mm)
{
unsigned long addr;
@@ -330,40 +417,38 @@
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD |
VM_MAYWRITE | VM_MAYEXEC,
- &aarch32_vdso_spec[C_SIGPAGE]);
+ &aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
if (IS_ERR(ret))
goto out;
- mm->context.vdso = (void *)addr;
+ mm->context.sigpage = (void *)addr;
out:
return PTR_ERR_OR_ZERO(ret);
}
-#endif /* !CONFIG_COMPAT_VDSO */
int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
int ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = aarch32_kuser_helpers_setup(mm);
if (ret)
goto out;
-#ifdef CONFIG_COMPAT_VDSO
- ret = __setup_additional_pages(ARM64_VDSO32,
- mm,
- bprm,
- uses_interp);
-#else
- ret = aarch32_sigreturn_setup(mm);
-#endif /* CONFIG_COMPAT_VDSO */
+ if (IS_ENABLED(CONFIG_COMPAT_VDSO)) {
+ ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm,
+ uses_interp);
+ if (ret)
+ goto out;
+ }
+ ret = aarch32_sigreturn_setup(mm);
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
#endif /* CONFIG_COMPAT */
@@ -371,22 +456,21 @@
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
- return __vdso_remap(ARM64_VDSO, sm, new_vma);
+ return __vdso_remap(VDSO_ABI_AA64, sm, new_vma);
}
-/*
- * aarch64_vdso_pages:
- * 0 - vvar
- * 1 - vdso
- */
-#define A_VVAR 0
-#define A_VDSO 1
-#define A_PAGES (A_VDSO + 1)
-static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
- {
+enum aarch64_map {
+ AA64_MAP_VVAR,
+ AA64_MAP_VDSO,
+};
+
+static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
+ [AA64_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
+ .mremap = vvar_mremap,
},
- {
+ [AA64_MAP_VDSO] = {
.name = "[vdso]",
.mremap = vdso_mremap,
},
@@ -394,28 +478,23 @@
static int __init vdso_init(void)
{
- vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
- vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
+ vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
+ vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
- return __vdso_init(ARM64_VDSO);
+ return __vdso_init(VDSO_ABI_AA64);
}
arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
int ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = __setup_additional_pages(ARM64_VDSO,
- mm,
- bprm,
- uses_interp);
-
- up_write(&mm->mmap_sem);
+ ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp);
+ mmap_write_unlock(mm);
return ret;
}
diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore
index f8b69d8..652e31d 100644
--- a/arch/arm64/kernel/vdso/.gitignore
+++ b/arch/arm64/kernel/vdso/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 3862cad..d65f522 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -17,16 +17,20 @@
targets := $(obj-vdso) vdso.so vdso.so.dbg
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
- --build-id -n -T
+btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
+
+# -Bsymbolic has been added for consistency with arm, the compat vDSO and
+# potential future proofing if we end up with internal calls to the exported
+# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
+# preparation in build-time C")).
+ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
+ -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id=sha1 -n \
+ $(btildflags-y) -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING
-VDSO_LDFLAGS := -Bsymbolic
-
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
-KBUILD_CFLAGS += $(DISABLE_LTO)
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
@@ -38,18 +42,11 @@
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
-# Clang versions less than 8 do not support -mcmodel=tiny
-ifeq ($(CONFIG_CC_IS_CLANG), y)
- ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
- CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
- endif
-endif
-
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
obj-y += vdso.o
-extra-y += vdso.lds
+targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
# Force dependency (incbin is bad)
diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
index 0664aca..8b806ea 100755
--- a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
+++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
@@ -8,7 +8,7 @@
# Doing this inside the Makefile will break the $(filter-out) function,
# causing Kbuild to rebuild the vdso-offsets header file every time.
#
-# Author: Will Deacon <will.deacon@arm.com
+# Author: Will Deacon <will.deacon@arm.com>
#
LC_ALL=C
diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
index 0ce6ec7..3d4e822 100644
--- a/arch/arm64/kernel/vdso/note.S
+++ b/arch/arm64/kernel/vdso/note.S
@@ -12,9 +12,12 @@
#include <linux/version.h>
#include <linux/elfnote.h>
#include <linux/build-salt.h>
+#include <asm/assembler.h>
ELFNOTE_START(Linux, 0, "a")
.long LINUX_VERSION_CODE
ELFNOTE_END
BUILD_SALT
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S
index 0723aa3..0e18729 100644
--- a/arch/arm64/kernel/vdso/sigreturn.S
+++ b/arch/arm64/kernel/vdso/sigreturn.S
@@ -1,7 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Sigreturn trampoline for returning from a signal when the SA_RESTORER
- * flag is not set.
+ * flag is not set. It serves primarily as a hall of shame for crappy
+ * unwinders and features an exciting but mysterious NOP instruction.
+ *
+ * It's also fragile as hell, so please think twice before changing anything
+ * in here.
*
* Copyright (C) 2012 ARM Limited
*
@@ -9,18 +13,68 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unistd.h>
.text
- nop
-ENTRY(__kernel_rt_sigreturn)
- .cfi_startproc
- .cfi_signal_frame
- .cfi_def_cfa x29, 0
- .cfi_offset x29, 0 * 8
- .cfi_offset x30, 1 * 8
+/*
+ * NOTE!!! You may notice that all of the .cfi directives in this file have
+ * been commented out. This is because they have been shown to trigger segfaults
+ * in libgcc when unwinding out of a SIGCANCEL handler to invoke pthread
+ * cleanup handlers during the thread cancellation dance. By omitting the
+ * directives, we trigger an arm64-specific fallback path in the unwinder which
+ * recognises the signal frame and restores many of the registers directly from
+ * the sigcontext. Re-enabling the cfi directives here therefore needs to be
+ * much more comprehensive to reduce the risk of further regressions.
+ */
+
+/* Ensure that the mysterious NOP can be associated with a function. */
+// .cfi_startproc
+
+/*
+ * .cfi_signal_frame causes the corresponding Frame Description Entry (FDE) in
+ * the .eh_frame section to be annotated as a signal frame. This allows DWARF
+ * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo() and identify
+ * the next frame using the unmodified return address instead of subtracting 1,
+ * which may yield the wrong FDE.
+ */
+// .cfi_signal_frame
+
+/*
+ * Tell the unwinder where to locate the frame record linking back to the
+ * interrupted context. We don't provide unwind info for registers other than
+ * the frame pointer and the link register here; in practice, this is likely to
+ * be insufficient for unwinding in C/C++ based runtimes, especially without a
+ * means to restore the stack pointer. Thankfully, unwinders and debuggers
+ * already have baked-in strategies for attempting to unwind out of signals.
+ */
+// .cfi_def_cfa x29, 0
+// .cfi_offset x29, 0 * 8
+// .cfi_offset x30, 1 * 8
+
+/*
+ * This mysterious NOP is required for some unwinders (e.g. libc++) that
+ * unconditionally subtract one from the result of _Unwind_GetIP() in order to
+ * identify the calling function.
+ * Hack borrowed from arch/powerpc/kernel/vdso64/sigtramp.S.
+ */
+ nop // Mysterious NOP
+
+/*
+ * GDB, libgcc and libunwind rely on being able to identify the sigreturn
+ * instruction sequence to unwind from signal handlers. We cannot, therefore,
+ * use SYM_FUNC_START() here, as it will emit a BTI C instruction and break the
+ * unwinder. Thankfully, this function is only ever called from a RET and so
+ * omitting the landing pad is perfectly fine.
+ */
+SYM_CODE_START(__kernel_rt_sigreturn)
+// PLEASE DO NOT MODIFY
mov x8, #__NR_rt_sigreturn
+// PLEASE DO NOT MODIFY
svc #0
- .cfi_endproc
-ENDPROC(__kernel_rt_sigreturn)
+// PLEASE DO NOT MODIFY
+// .cfi_endproc
+SYM_CODE_END(__kernel_rt_sigreturn)
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
index d1414fe..c4b1990 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/const.h>
+#include <asm/assembler.h>
#include <asm/page.h>
.globl vdso_start, vdso_end
@@ -19,3 +20,5 @@
vdso_end:
.previous
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 815df25..b840ab1 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -17,7 +17,10 @@
SECTIONS
{
- PROVIDE(_vdso_data = . - PAGE_SIZE);
+ PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
index 7476355..4236cf3 100644
--- a/arch/arm64/kernel/vdso/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -5,8 +5,6 @@
* Copyright (C) 2018 ARM Limited
*
*/
-#include <linux/time.h>
-#include <linux/types.h>
int __kernel_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore
index 4fea950..3542fa2 100644
--- a/arch/arm64/kernel/vdso32/.gitignore
+++ b/arch/arm64/kernel/vdso32/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
vdso.so.raw
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 40dffe6..abad38c 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -11,16 +11,21 @@
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
CC_COMPAT ?= $(CC)
+CC_COMPAT += --target=arm-linux-gnueabi
else
CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
endif
+ifeq ($(CONFIG_LD_IS_LLD), y)
+LD_COMPAT ?= $(LD)
+else
+LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld
+endif
+
cc32-option = $(call try-run,\
$(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
cc32-disable-warning = $(call try-run,\
$(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
-cc32-ldoption = $(call try-run,\
- $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
cc32-as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
@@ -32,16 +37,13 @@
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc
+VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags
# From top-level Makefile
VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
-ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
-VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
-endif
-
VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
ifdef CONFIG_DEBUG_INFO
VDSO_CAFLAGS += -g
@@ -79,9 +81,9 @@
# Some useful compiler-dependent flags from top-level Makefile
VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
-VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
+VDSO_CFLAGS += -fno-strict-overflow
VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
-VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
+VDSO_CFLAGS += -Werror=date-time
VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
# The 32-bit compiler does not provide 128-bit integers, which are used in
@@ -94,6 +96,14 @@
VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
VDSO_CFLAGS += -Wno-int-to-pointer-cast
+# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is
+# unreliable.
+ifeq ($(CONFIG_THUMB2_COMPAT_VDSO), y)
+VDSO_CFLAGS += -mthumb -fomit-frame-pointer
+else
+VDSO_CFLAGS += -marm
+endif
+
VDSO_AFLAGS := $(VDSO_CAFLAGS)
VDSO_AFLAGS += -D__ASSEMBLY__
@@ -103,25 +113,20 @@
VDSO_CFLAGS += $(dmbinstr)
VDSO_AFLAGS += $(dmbinstr)
-VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
# From arm vDSO Makefile
-VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
-VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
-VDSO_LDFLAGS += -Wl,--hash-style=sysv
-VDSO_LDFLAGS += -Wl,--build-id
-VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
+VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
+VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared --hash-style=sysv --build-id=sha1
# Borrow vdsomunge.c from the arm vDSO
# We have to use a relative path because scripts/Makefile.host prefixes
-# $(hostprogs-y) with $(obj)
+# $(hostprogs) with $(obj)
munge := ../../../arm/vdso/vdsomunge
-hostprogs-y := $(munge)
+hostprogs := $(munge)
c-obj-vdso := note.o
c-obj-vdso-gettimeofday := vgettimeofday.o
-asm-obj-vdso := sigreturn.o
ifneq ($(c-gettimeofday-y),)
VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y)
@@ -137,7 +142,7 @@
obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
obj-y += vdso.o
-extra-y += vdso.lds
+targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
# Force dependency (vdso.s includes vdso.so through incbin)
@@ -171,8 +176,8 @@
cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
quiet_cmd_vdsold = LD32 $@
- cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
- -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+ cmd_vdsold = $(LD_COMPAT) $(VDSO_LDFLAGS) \
+ -T $(filter %.lds,$^) $(filter %.o,$^) -o $@
quiet_cmd_vdsocc = CC32 $@
cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
quiet_cmd_vdsocc_gettimeofday = CC32 $@
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
deleted file mode 100644
index 1a81277..0000000
--- a/arch/arm64/kernel/vdso32/sigreturn.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file provides both A32 and T32 versions, in accordance with the
- * arm sigreturn code.
- *
- * Copyright (C) 2018 ARM Limited
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define ARM_ENTRY(name) \
- ENTRY(name)
-
-#define ARM_ENDPROC(name) \
- .type name, %function; \
- END(name)
-
- .text
-
- .arm
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_sigreturn_arm)
- mov r7, #__NR_compat_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_sigreturn_arm)
-
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_rt_sigreturn_arm)
- mov r7, #__NR_compat_rt_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_rt_sigreturn_arm)
-
- .thumb
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_sigreturn_thumb)
- mov r7, #__NR_compat_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_sigreturn_thumb)
-
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_rt_sigreturn_thumb)
- mov r7, #__NR_compat_rt_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index a394492..3348ce5 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -17,7 +17,10 @@
SECTIONS
{
- PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -64,19 +67,7 @@
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_clock_getres;
- __kernel_sigreturn_arm;
- __kernel_sigreturn_thumb;
- __kernel_rt_sigreturn_arm;
- __kernel_rt_sigreturn_thumb;
__vdso_clock_gettime64;
local: *;
};
}
-
-/*
- * Make the sigreturn code visible to the kernel.
- */
-VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
-VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
-VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
-VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 54fc1c2..5acff29 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -5,26 +5,16 @@
* Copyright (C) 2018 ARM Limited
*
*/
-#include <linux/time.h>
-#include <linux/types.h>
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
- /* The checks below are required for ABI consistency with arm */
- if ((u32)ts >= TASK_SIZE_32)
- return -EFAULT;
-
return __cvdso_clock_gettime32(clock, ts);
}
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
- /* The checks below are required for ABI consistency with arm */
- if ((u32)ts >= TASK_SIZE_32)
- return -EFAULT;
-
return __cvdso_clock_gettime(clock, ts);
}
@@ -37,10 +27,6 @@
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
- /* The checks below are required for ABI consistency with arm */
- if ((u32)res >= TASK_SIZE_32)
- return -EFAULT;
-
return __cvdso_clock_getres_time32(clock_id, res);
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1f82cf6..71f4b5f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,32 +5,41 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
*/
+#define RO_EXCEPTION_TABLE_ALIGN 8
+#define RUNTIME_DISCARD_EXIT
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
+#include <asm/hyp_image.h>
#include <asm/kernel-pgtable.h>
-#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include "image.h"
-/* .exit.text needed in case of alternative patching */
-#define ARM_EXIT_KEEP(x) x
-#define ARM_EXIT_DISCARD(x)
-
OUTPUT_ARCH(aarch64)
ENTRY(_text)
jiffies = jiffies_64;
+#ifdef CONFIG_KVM
#define HYPERVISOR_EXTABLE \
. = ALIGN(SZ_8); \
__start___kvm_ex_table = .; \
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
+#define HYPERVISOR_PERCPU_SECTION \
+ . = ALIGN(PAGE_SIZE); \
+ HYP_SECTION_NAME(.data..percpu) : { \
+ *(HYP_SECTION_NAME(.data..percpu)) \
+ }
+#else /* CONFIG_KVM */
+#define HYPERVISOR_EXTABLE
+#define HYPERVISOR_PERCPU_SECTION
+#endif
+
#define HYPERVISOR_TEXT \
/* \
* Align to 4 KB so that \
@@ -78,8 +87,8 @@
/*
* The size of the PE/COFF section that covers the kernel image, which
- * runs from stext to _edata, must be a round multiple of the PE/COFF
- * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * runs from _stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. '_stext'
* itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
* boundary should be sufficient.
*/
@@ -100,18 +109,13 @@
* matching the same input section name. There is no documented
* order of matching.
*/
+ DISCARDS
/DISCARD/ : {
- ARM_EXIT_DISCARD(EXIT_TEXT)
- ARM_EXIT_DISCARD(EXIT_DATA)
- EXIT_CALL
- *(.discard)
- *(.discard.*)
*(.interp .dynamic)
*(.dynsym .dynstr .hash .gnu.hash)
- *(.eh_frame)
}
- . = KIMAGE_VADDR + TEXT_OFFSET;
+ . = KIMAGE_VADDR;
.head.text : {
_text = .;
@@ -119,9 +123,6 @@
}
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
ENTRY_TEXT
@@ -140,14 +141,20 @@
*(.got) /* Global offset table */
}
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the lazy dispatch entries.
+ */
+ .got.plt : { *(.got.plt) }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18,
+ "Unexpected GOT/PLT entries detected!")
+
. = ALIGN(SEGMENT_ALIGN);
_etext = .; /* End of text section */
- RO_DATA(PAGE_SIZE) /* everything from this point to */
- EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
- NOTES
+ /* everything from this point to __init_begin will be marked RO NX */
+ RO_DATA(PAGE_SIZE)
- . = ALIGN(PAGE_SIZE);
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
idmap_pg_end = .;
@@ -171,7 +178,7 @@
__exittext_begin = .;
.exit.text : {
- ARM_EXIT_KEEP(EXIT_TEXT)
+ EXIT_TEXT
}
__exittext_end = .;
@@ -182,7 +189,7 @@
__alt_instructions_end = .;
}
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(SEGMENT_ALIGN);
__inittext_end = .;
__initdata_begin = .;
@@ -195,10 +202,11 @@
*(.init.rodata.* .init.bss) /* from the EFI stub */
}
.exit.data : {
- ARM_EXIT_KEEP(EXIT_DATA)
+ EXIT_DATA
}
PERCPU_SECTION(L1_CACHE_BYTES)
+ HYPERVISOR_PERCPU_SECTION
.rela.dyn : ALIGN(8) {
*(.rela .rela*)
@@ -222,7 +230,7 @@
_data = .;
_sdata = .;
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
/*
* Data written with the MMU off but read with the MMU on requires
@@ -253,12 +261,27 @@
. += INIT_DIR_SIZE;
init_pg_end = .;
+ . = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
HEAD_SYMBOLS
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .data.rel.ro : { *(.data.rel.ro) }
+ ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
}
#include "image-vars.h"
@@ -276,10 +299,10 @@
<= SZ_4K, "Hibernate exit text too big or misaligned")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
"Entry trampoline text too big")
#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
-ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")