Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index c5f676c..67ebd30 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1 +1,2 @@
+prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3b66f2c..a7ca8fe 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,7 +5,8 @@
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# Disable clang warning for using setjmp without setjmp.h header
+CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header)
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
@@ -20,12 +21,26 @@
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
+endif
+
+KASAN_SANITIZE_early_32.o := n
+KASAN_SANITIZE_cputable.o := n
+KASAN_SANITIZE_prom_init.o := n
+KASAN_SANITIZE_btext.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
obj-y := cputable.o ptrace.o syscalls.o \
@@ -33,14 +48,15 @@
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
- udbg.o misc.o io.o dma.o misc_$(BITS).o \
+ udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_PPC_DAWR) += dawr.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
@@ -62,11 +78,13 @@
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_FA_DUMP) += fadump.o
+ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
+obj-y += fadump.o
+endif
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
-obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
+obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
obj-$(CONFIG_TAU) += tau_6xx.o
obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
ifdef CONFIG_FSL_BOOKE
@@ -90,7 +108,7 @@
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
@@ -102,6 +120,7 @@
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
@@ -138,44 +157,45 @@
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y += ucall.o
+endif
-# Disable GCOV & sanitizers in odd or sensitive code
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
+KCOV_INSTRUMENT_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
GCOV_PROFILE_machine_kexec_64.o := n
+KCOV_INSTRUMENT_machine_kexec_64.o := n
UBSAN_SANITIZE_machine_kexec_64.o := n
GCOV_PROFILE_machine_kexec_32.o := n
+KCOV_INSTRUMENT_machine_kexec_32.o := n
UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
+KCOV_INSTRUMENT_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
GCOV_PROFILE_kprobes-ftrace.o := n
+KCOV_INSTRUMENT_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_vdso.o := n
+# Necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_cputable.o := n
+KCOV_INSTRUMENT_setup_64.o := n
+KCOV_INSTRUMENT_paca.o := n
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-extra-y += systbl_chk.i
-$(obj)/systbl.o: systbl_chk
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
-quiet_cmd_systbl_chk = CALL $<
- cmd_systbl_chk = $(CONFIG_SHELL) $< $(obj)/systbl_chk.i
+quiet_cmd_prom_init_check = PROMCHK $@
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
-PHONY += systbl_chk
-systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
- $(call cmd,systbl_chk)
-
-ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-$(obj)/built-in.a: prom_init_check
-
-quiet_cmd_prom_init_check = CALL $<
- cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
-
-PHONY += prom_init_check
-prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
- $(call cmd,prom_init_check)
-endif
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+ $(call if_changed,prom_init_check)
+targets += prom_init_check
clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 11550a3..92045ed 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* align.c - handle alignment exceptions for the Power PC.
*
* Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
@@ -10,11 +11,6 @@
* Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
* <benh@kernel.crashing.org>
* Merge ppc32 and ppc64 implementations
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -131,8 +127,7 @@
/* Verify the address of the operand */
if (unlikely(user_mode(regs) &&
- !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
- addr, nb)))
+ !access_ok(addr, nb)))
return -EFAULT;
/* userland only */
@@ -181,9 +176,11 @@
ret |= __get_user_inatomic(temp.v[1], p++);
ret |= __get_user_inatomic(temp.v[2], p++);
ret |= __get_user_inatomic(temp.v[3], p++);
+ /* fall through */
case 4:
ret |= __get_user_inatomic(temp.v[4], p++);
ret |= __get_user_inatomic(temp.v[5], p++);
+ /* fall through */
case 2:
ret |= __get_user_inatomic(temp.v[6], p++);
ret |= __get_user_inatomic(temp.v[7], p++);
@@ -264,9 +261,11 @@
ret |= __put_user_inatomic(data.v[1], p++);
ret |= __put_user_inatomic(data.v[2], p++);
ret |= __put_user_inatomic(data.v[3], p++);
+ /* fall through */
case 4:
ret |= __put_user_inatomic(data.v[4], p++);
ret |= __put_user_inatomic(data.v[5], p++);
+ /* fall through */
case 2:
ret |= __put_user_inatomic(data.v[6], p++);
ret |= __put_user_inatomic(data.v[7], p++);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf155..484f54d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This program is used to generate definitions needed by
* assembly language modules.
@@ -6,13 +7,10 @@
* generate asm statements containing #defines,
* compile this file to assembler, and then extract the
* #defines from the assembly-language output.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#define GENERATING_ASM_OFFSETS /* asm/smp.h */
+
#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -79,16 +77,26 @@
{
OFFSET(THREAD, task_struct, thread);
OFFSET(MM, task_struct, mm);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(TASK_CANARY, task_struct, stack_canary);
+#ifdef CONFIG_PPC64
+ OFFSET(PACA_CANARY, paca_struct, canary);
+#endif
+#endif
OFFSET(MMCONTEXTID, mm_struct, context.id);
#ifdef CONFIG_PPC64
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
- OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
#else
- OFFSET(THREAD_INFO, task_struct, stack);
- DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
+#ifdef CONFIG_PPC_RTAS
+ OFFSET(RTAS_SP, thread_struct, rtas_sp);
+#endif
#endif /* CONFIG_PPC64 */
+ OFFSET(TASK_STACK, task_struct, stack);
+#ifdef CONFIG_SMP
+ OFFSET(TASK_CPU, task_struct, cpu);
+#endif
#ifdef CONFIG_LIVEPATCH
OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
@@ -135,6 +143,9 @@
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ OFFSET(KUAP, thread_struct, kuap);
+#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -156,8 +167,6 @@
OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
OFFSET(TI_PREEMPT, thread_info, preempt_count);
- OFFSET(TI_TASK, thread_info, task);
- OFFSET(TI_CPU, thread_info, cpu);
#ifdef CONFIG_PPC64
OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
@@ -172,8 +181,9 @@
OFFSET(PACAPROCSTART, paca_struct, cpu_start);
OFFSET(PACAKSAVE, paca_struct, kstack);
OFFSET(PACACURRENT, paca_struct, __current);
+ DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) +
+ offsetof(struct task_struct, thread_info));
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
- OFFSET(PACASTABRR, paca_struct, stab_rr);
OFFSET(PACAR1, paca_struct, saved_r1);
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
@@ -212,6 +222,7 @@
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACASLBCACHE, paca_struct, slb_cache);
OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
+ OFFSET(PACASTABRR, paca_struct, stab_rr);
OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
@@ -255,8 +266,9 @@
OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
+#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
- OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
+#endif
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
@@ -274,11 +286,6 @@
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#ifdef CONFIG_PPC64
- /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
- DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
- DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-#endif /* CONFIG_PPC64 */
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -322,12 +329,13 @@
STACK_PT_REGS_OFFSET(_ESR, dsisr);
#else /* CONFIG_PPC64 */
STACK_PT_REGS_OFFSET(SOFTE, softe);
-
- /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
- DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
- DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+ STACK_PT_REGS_OFFSET(_PPR, ppr);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_KUAP
+ STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
+#endif
+
#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
@@ -387,12 +395,12 @@
OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
- OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec);
- OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec);
+ OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
+ OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
- OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec);
- OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec);
+ OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
+ OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
#else
OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
@@ -438,7 +446,7 @@
#ifdef CONFIG_PPC_BOOK3S
OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif
- OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
@@ -498,11 +506,13 @@
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
+ OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+ OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
@@ -695,7 +705,7 @@
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
- OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
@@ -761,23 +771,6 @@
OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
#endif
-#ifdef CONFIG_PPC_POWERNV
- OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
- OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
- OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
- OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
- OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
- OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
-#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
- STOP_SPR(STOP_PID, pid);
- STOP_SPR(STOP_LDBAR, ldbar);
- STOP_SPR(STOP_FSCR, fscr);
- STOP_SPR(STOP_HFSCR, hfscr);
- STOP_SPR(STOP_MMCR1, mmcr1);
- STOP_SPR(STOP_MMCR2, mmcr2);
- STOP_SPR(STOP_MMCRA, mmcra);
-#endif
-
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index b2072d5..6dfceaa 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -163,7 +163,7 @@
offset = ((unsigned long) dispDeviceBase) - base;
size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+ dispDeviceRect[0];
- vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
+ vbase = ioremap_wc(base, size);
if (!vbase)
return;
logicalDisplayBase = vbase + offset;
@@ -232,20 +232,12 @@
int __init btext_find_display(int allow_nonstdout)
{
- const char *name;
- struct device_node *np = NULL;
+ struct device_node *np = of_stdout;
int rc = -ENODEV;
- name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- if (name != NULL) {
- np = of_find_node_by_path(name);
- if (np != NULL) {
- if (strcmp(np->type, "display") != 0) {
- printk("boot stdout isn't a display !\n");
- of_node_put(np);
- np = NULL;
- }
- }
+ if (!of_node_is_type(np, "display")) {
+ printk("boot stdout isn't a display !\n");
+ np = NULL;
}
if (np)
rc = btext_initialize(np);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index a8f20e5..4703362 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Processor cache information made available to userspace via sysfs;
* intended to be compatible with x86 intel_cacheinfo implementation.
*
* Copyright 2008 IBM Corporation
* Author: Nathan Lynch
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
*/
#include <linux/cpu.h>
@@ -20,6 +17,8 @@
#include <linux/percpu.h>
#include <linux/slab.h>
#include <asm/prom.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
#include "cacheinfo.h"
@@ -351,8 +350,6 @@
CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
}
-/*
- */
static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
{
pr_debug("creating L%d ucache for %pOF\n", level, node);
@@ -426,7 +423,7 @@
static void do_subsidiary_caches_debugcheck(struct cache *cache)
{
WARN_ON_ONCE(cache->level != 1);
- WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+ WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu"));
}
static void do_subsidiary_caches(struct cache *cache)
@@ -627,17 +624,48 @@
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
+static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
+{
+ struct kobject *index_dir_kobj = &index->kobj;
+ struct kobject *cache_dir_kobj = index_dir_kobj->parent;
+ struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
+ struct device *dev = kobj_to_dev(cpu_dev_kobj);
+
+ return dev->id;
+}
+
+/*
+ * On big-core systems, each core has two groups of CPUs each of which
+ * has its own L1-cache. The thread-siblings which share l1-cache with
+ * @cpu can be obtained via cpu_smallcore_mask().
+ */
+static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
+{
+ if (cache->level == 1)
+ return cpu_smallcore_mask(cpu);
+
+ return &cache->shared_cpu_map;
+}
+
static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
{
struct cache_index_dir *index;
struct cache *cache;
- int ret;
+ const struct cpumask *mask;
+ int ret, cpu;
index = kobj_to_cache_index_dir(k);
cache = index->cache;
+ if (has_big_cores) {
+ cpu = index_dir_to_cpu(index);
+ mask = get_big_core_shared_cpu_map(cpu, cache);
+ } else {
+ mask = &cache->shared_cpu_map;
+ }
+
ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
- cpumask_pr_args(&cache->shared_cpu_map));
+ cpumask_pr_args(mask));
buf[ret++] = '\n';
buf[ret] = '\0';
return ret;
@@ -726,23 +754,21 @@
index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
if (!index_dir)
- goto err;
+ return;
index_dir->cache = cache;
rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
cache_dir->kobj, "index%d", index);
- if (rc)
- goto err;
+ if (rc) {
+ kobject_put(&index_dir->kobj);
+ return;
+ }
index_dir->next = cache_dir->index;
cache_dir->index = index_dir;
cacheinfo_create_index_opt_attrs(index_dir);
-
- return;
-err:
- kfree(index_dir);
}
static void cacheinfo_sysfs_populate(unsigned int cpu_id,
@@ -865,4 +891,25 @@
if (cache)
cache_cpu_clear(cache, cpu_id);
}
+
+void cacheinfo_teardown(void)
+{
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu)
+ cacheinfo_cpu_offline(cpu);
+}
+
+void cacheinfo_rebuild(void)
+{
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu)
+ cacheinfo_cpu_online(cpu);
+}
+
#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
index 955f5e9..52bd3fc 100644
--- a/arch/powerpc/kernel/cacheinfo.h
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -6,4 +6,8 @@
extern void cacheinfo_cpu_online(unsigned int cpu_id);
extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+/* Allow migration/suspend to tear down and rebuild the hierarchy. */
+extern void cacheinfo_teardown(void);
+extern void cacheinfo_rebuild(void);
+
#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
index e32b4a9..e1d705e 100644
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Valentine Barshak <vbarshak@ru.mvista.com>
@@ -5,12 +6,6 @@
*
* Based on cpu_setup_6xx code by
* Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index fa3c2c9..f6517f6 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/processor.h>
@@ -24,6 +19,7 @@
li r10,0
mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+
BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
@@ -326,7 +322,7 @@
lis r5,cpu_state_storage@h
ori r5,r5,cpu_state_storage@l
- /* Save HID0 (common to all CONFIG_6xx cpus) */
+ /* Save HID0 (common to all CONFIG_PPC_BOOK3S_32 cpus) */
mfspr r3,SPRN_HID0
stw r3,CS_HID0(r5)
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 8d142e5..2b4f3ec 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Kumar Gala <galak@kernel.crashing.org>
@@ -5,19 +6,13 @@
*
* Based on cpu_setup_6xx code by
* Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/ppc_asm.h>
-#include <asm/mmu-book3e.h>
+#include <asm/nohash/mmu-book3e.h>
#include <asm/asm-offsets.h>
#include <asm/mpc85xx.h>
diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S
index d62cb9c..e6bfd44 100644
--- a/arch/powerpc/kernel/cpu_setup_pa6t.S
+++ b/arch/powerpc/kernel/cpu_setup_pa6t.S
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2006-2007 PA Semi, Inc
*
* Maintained by: Olof Johansson <olof@lixom.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 458b928..a460298 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/processor.h>
@@ -28,6 +23,7 @@
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -42,6 +38,7 @@
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -59,6 +56,7 @@
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -81,6 +79,7 @@
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -103,6 +102,7 @@
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -128,6 +128,7 @@
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -147,8 +148,8 @@
rldicl. r0,r3,4,63
bnelr
ld r5,CPU_SPEC_FEATURES(r4)
- LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
- xor r5,r5,r6
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
+ andc r5,r5,r6
std r5,CPU_SPEC_FEATURES(r4)
blr
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
index 12fac8d..f0c07e7 100644
--- a/arch/powerpc/kernel/cpu_setup_ppc970.S
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2da0134..e745abc 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
*
* Modifications for ppc64:
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
@@ -573,7 +569,7 @@
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@@ -587,6 +583,8 @@
.machine_check = machine_check_generic,
.platform = "ppc601",
},
+#endif /* CONFIG_PPC_BOOK3S_601 */
+#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@@ -1141,6 +1139,7 @@
.machine_check = machine_check_generic,
.platform = "ppc603",
},
+#ifdef CONFIG_PPC_83xx
{ /* e300c1 (a 603e core, plus some) on 83xx */
.pvr_mask = 0x7fff0000,
.pvr_value = 0x00830000,
@@ -1151,7 +1150,7 @@
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
+ .machine_check = machine_check_83xx,
.platform = "ppc603",
},
{ /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
@@ -1165,7 +1164,7 @@
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
+ .machine_check = machine_check_83xx,
.platform = "ppc603",
},
{ /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
@@ -1179,7 +1178,7 @@
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
+ .machine_check = machine_check_83xx,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e300",
.oprofile_type = PPC_OPROFILE_FSL_EMB,
@@ -1196,12 +1195,13 @@
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
+ .machine_check = machine_check_83xx,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e300",
.oprofile_type = PPC_OPROFILE_FSL_EMB,
.platform = "ppc603",
},
+#endif
{ /* default match, we assume split I/D cache & TB (non-601)... */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
@@ -1214,7 +1214,7 @@
.machine_check = machine_check_generic,
.platform = "ppc603",
},
-#endif /* CONFIG_PPC_BOOK3S_32 */
+#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
@@ -2145,7 +2145,11 @@
struct cpu_spec *t = &the_cpu_spec;
t = PTRRELOC(t);
- *t = *s;
+ /*
+ * use memcpy() instead of *t = *s so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
}
@@ -2159,8 +2163,11 @@
t = PTRRELOC(t);
old = *t;
- /* Copy everything, then do fixups */
- *t = *s;
+ /*
+ * Copy everything, then do fixups. Use memcpy() instead of *t = *s
+ * so that GCC replaces it by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
/*
* If we are overriding a previous value derived from the real
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 43a3ce2..d488311 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Architecture specific (PPC64) functions for kexec based crash dumps.
*
* Copyright (C) 2005, IBM Corp.
*
* Created by: Haren Myneni
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- *
*/
#include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index d10ad25..05745dd 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Routines for doing kexec-based kdump.
*
* Copyright (C) 2005, IBM Corp.
*
* Created by: Michael Ellerman
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#undef DEBUG
@@ -110,7 +108,7 @@
vaddr = __va(paddr);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
} else {
- vaddr = __ioremap(paddr, PAGE_SIZE, 0);
+ vaddr = ioremap_cache(paddr, PAGE_SIZE);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
iounmap(vaddr);
}
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
new file mode 100644
index 0000000..5f66b95
--- /dev/null
+++ b/arch/powerpc/kernel/dawr.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * DAWR infrastructure
+ *
+ * Copyright 2019, Michael Neuling, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <asm/debugfs.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+int set_dawr(struct arch_hw_breakpoint *brk)
+{
+ unsigned long dawr, dawrx, mrd;
+
+ dawr = brk->address;
+
+ dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE))
+ << (63 - 58);
+ dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59);
+ dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
+ /*
+ * DAWR length is stored in field MDR bits 48:53. Matches range in
+ * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
+ * 0b111111=64DW.
+ * brk->len is in bytes.
+ * This aligns up to double word size, shifts and does the bias.
+ */
+ mrd = ((brk->len + 7) >> 3) - 1;
+ dawrx |= (mrd & 0x3f) << (63 - 53);
+
+ if (ppc_md.set_dawr)
+ return ppc_md.set_dawr(dawr, dawrx);
+
+ mtspr(SPRN_DAWR, dawr);
+ mtspr(SPRN_DAWRX, dawrx);
+
+ return 0;
+}
+
+static void set_dawr_cb(void *info)
+{
+ set_dawr(info);
+}
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if (!dawr_force_enable &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ set_dawr(&null_brk) != H_SUCCESS)
+ return -ENODEV;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function(set_dawr_cb, &null_brk, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ powerpc_debugfs_root,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index b6fe883..f17ff12 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Author: Kumar Gala <galak@kernel.crashing.org>
*
* Copyright 2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -18,6 +14,7 @@
#include <asm/dbell.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
+#include <asm/trace.h>
#ifdef CONFIG_SMP
@@ -36,7 +33,7 @@
{
u32 tag = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -51,7 +48,7 @@
{
u32 tag = cpu_thread_in_core(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -81,16 +78,18 @@
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
+ trace_doorbell_entry(regs);
ppc_msgsync();
may_hard_irq_enable();
- kvmppc_set_host_ipi(smp_processor_id(), 0);
+ kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
+ trace_doorbell_exit(regs);
irq_exit();
set_irq_regs(old_regs);
}
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index f9fe208..e486d1d 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -6,13 +6,31 @@
* busses using the iommu infrastructure
*/
-#include <linux/export.h>
+#include <linux/dma-direct.h>
+#include <linux/pci.h>
#include <asm/iommu.h>
/*
* Generic iommu implementation
*/
+/*
+ * The coherent mask may be smaller than the real mask, check if we can
+ * really use a direct window.
+ */
+static inline bool dma_iommu_alloc_bypass(struct device *dev)
+{
+ return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
+ dma_direct_supported(dev, dev->coherent_dma_mask);
+}
+
+static inline bool dma_iommu_map_bypass(struct device *dev,
+ unsigned long attrs)
+{
+ return dev->archdata.iommu_bypass &&
+ (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
+}
+
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
@@ -21,6 +39,8 @@
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
+ if (dma_iommu_alloc_bypass(dev))
+ return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
@@ -30,7 +50,11 @@
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
- iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+ else
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
+ dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -43,8 +67,11 @@
enum dma_data_direction direction,
unsigned long attrs)
{
+ if (dma_iommu_map_bypass(dev, attrs))
+ return dma_direct_map_page(dev, page, offset, size, direction,
+ attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
- size, device_to_mask(dev), direction, attrs);
+ size, dma_get_mask(dev), direction, attrs);
}
@@ -52,8 +79,11 @@
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
- iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
- attrs);
+ if (!dma_iommu_map_bypass(dev, attrs))
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
+ direction, attrs);
+ else
+ dma_direct_unmap_page(dev, dma_handle, size, direction, attrs);
}
@@ -61,16 +91,30 @@
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
+ if (dma_iommu_map_bypass(dev, attrs))
+ return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
- device_to_mask(dev), direction, attrs);
+ dma_get_mask(dev), direction, attrs);
}
static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ if (!dma_iommu_map_bypass(dev, attrs))
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
+ else
+ dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs);
+}
+
+static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+
+ return phb->controller_ops.iommu_bypass_supported &&
+ phb->controller_ops.iommu_bypass_supported(pdev, mask);
}
/* We support DMA to/from any memory page via the iommu */
@@ -78,9 +122,14 @@
{
struct iommu_table *tbl = get_iommu_table_base(dev);
+ if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
+ dev->archdata.iommu_bypass = true;
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ return 1;
+ }
+
if (!tbl) {
- dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
- ", table unavailable\n", mask);
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
return 0;
}
@@ -89,38 +138,75 @@
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
mask, tbl->it_offset << tbl->it_page_shift);
return 0;
- } else
- return 1;
+ }
+
+ dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+ dev->archdata.iommu_bypass = false;
+ return 1;
}
-static u64 dma_iommu_get_required_mask(struct device *dev)
+u64 dma_iommu_get_required_mask(struct device *dev)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
+
if (!tbl)
return 0;
+ if (dev_is_pci(dev)) {
+ u64 bypass_mask = dma_direct_get_required_mask(dev);
+
+ if (dma_iommu_bypass_supported(dev, bypass_mask))
+ return bypass_mask;
+ }
+
mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
mask += mask - 1;
return mask;
}
-int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
{
- return dma_addr == IOMMU_MAPPING_ERROR;
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir);
}
-struct dma_map_ops dma_iommu_ops = {
+static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr,
+ size_t sz, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_single_for_device(dev, addr, sz, dir);
+}
+
+extern void dma_iommu_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+
+extern void dma_iommu_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
+}
+
+const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
.map_sg = dma_iommu_map_sg,
.unmap_sg = dma_iommu_unmap_sg,
.dma_supported = dma_iommu_dma_supported,
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
- .mapping_error = dma_iommu_mapping_error,
+ .sync_single_for_cpu = dma_iommu_sync_for_cpu,
+ .sync_single_for_device = dma_iommu_sync_for_device,
+ .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
+ .sync_sg_for_device = dma_iommu_sync_sg_for_device,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
-EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
new file mode 100644
index 0000000..ffbbbc4
--- /dev/null
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <asm/machdep.h>
+
+void arch_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (ppc_md.dma_set_mask)
+ ppc_md.dma_set_mask(dev, dma_mask);
+}
+EXPORT_SYMBOL(arch_dma_set_mask);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963..fc78161 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -1,119 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Contains routines needed to support swiotlb for ppc.
*
* Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
* Author: Becky Bruce
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
*/
-
-#include <linux/dma-mapping.h>
#include <linux/memblock.h>
-#include <linux/pfn.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/pci.h>
-
#include <asm/machdep.h>
#include <asm/swiotlb.h>
-#include <asm/dma.h>
unsigned int ppc_swiotlb_enable;
-static u64 swiotlb_powerpc_get_required(struct device *dev)
-{
- u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
-
- end = memblock_end_of_DRAM();
- if (max_direct_dma_addr && end > max_direct_dma_addr)
- end = max_direct_dma_addr;
- end += get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-/*
- * At the moment, all platforms that use this code only require
- * swiotlb to be used if we're operating on HIGHMEM. Since
- * we don't ever call anything other than map_sg, unmap_sg,
- * map_page, and unmap_page on highmem, use normal dma_ops
- * for everything else.
- */
-const struct dma_map_ops powerpc_swiotlb_dma_ops = {
- .alloc = __dma_nommu_alloc_coherent,
- .free = __dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .dma_supported = swiotlb_dma_supported,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
- .get_required_mask = swiotlb_powerpc_get_required,
-};
-
-void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
-{
- struct pci_controller *hose;
- struct dev_archdata *sd;
-
- hose = pci_bus_to_host(pdev->bus);
- sd = &pdev->dev.archdata;
- sd->max_direct_dma_addr =
- hose->dma_window_base_cur + hose->dma_window_size;
-}
-
-static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
- struct dev_archdata *sd;
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- sd = &dev->archdata;
- sd->max_direct_dma_addr = 0;
-
- /* May need to bounce if the device can't address all of DRAM */
- if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
- set_dma_ops(dev, &powerpc_swiotlb_dma_ops);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
- .notifier_call = ppc_swiotlb_bus_notify,
- .priority = 0,
-};
-
-int __init swiotlb_setup_bus_notifier(void)
-{
- bus_register_notifier(&platform_bus_type,
- &ppc_swiotlb_plat_bus_notifier);
- return 0;
-}
-
void __init swiotlb_detect_4g(void)
{
- if ((memblock_end_of_DRAM() - 1) > 0xffffffff) {
+ if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
ppc_swiotlb_enable = 1;
-#ifdef CONFIG_ZONE_DMA32
- limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
-#endif
- }
}
static int __init check_swiotlb_enabled(void)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
deleted file mode 100644
index dbfc705..0000000
--- a/arch/powerpc/kernel/dma.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
- *
- * Provide default implementations of the DMA mapping callbacks for
- * directly mapped busses.
- */
-
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
-#include <linux/gfp.h>
-#include <linux/memblock.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/bug.h>
-#include <asm/machdep.h>
-#include <asm/swiotlb.h>
-#include <asm/iommu.h>
-
-/*
- * Generic direct DMA implementation
- *
- * This implementation supports a per-device offset that can be applied if
- * the address at which memory is visible to devices is not 0. Platform code
- * can set archdata.dma_data to an unsigned long holding the offset. By
- * default the offset is PCI_DRAM_OFFSET.
- */
-
-static u64 __maybe_unused get_pfn_limit(struct device *dev)
-{
- u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
- struct dev_archdata __maybe_unused *sd = &dev->archdata;
-
-#ifdef CONFIG_SWIOTLB
- if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops)
- pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
-#endif
-
- return pfn;
-}
-
-static int dma_nommu_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PPC64
- u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
-
- /* Limit fits in the mask, we are good */
- if (mask >= limit)
- return 1;
-
-#ifdef CONFIG_FSL_SOC
- /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
- * that will have to be refined if/when they support iommus
- */
- return 1;
-#endif
- /* Sorry ... */
- return 0;
-#else
- return 1;
-#endif
-}
-
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- void *ret;
-#ifdef CONFIG_NOT_COHERENT_CACHE
- ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
- if (ret == NULL)
- return NULL;
- *dma_handle += get_dma_offset(dev);
- return ret;
-#else
- struct page *page;
- int node = dev_to_node(dev);
-#ifdef CONFIG_FSL_SOC
- u64 pfn = get_pfn_limit(dev);
- int zone;
-
- /*
- * This code should be OK on other platforms, but we have drivers that
- * don't set coherent_dma_mask. As a workaround we just ifdef it. This
- * whole routine needs some serious cleanup.
- */
-
- zone = dma_pfn_limit_to_zone(pfn);
- if (zone < 0) {
- dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
- __func__, pfn);
- return NULL;
- }
-
- switch (zone) {
- case ZONE_DMA:
- flag |= GFP_DMA;
- break;
-#ifdef CONFIG_ZONE_DMA32
- case ZONE_DMA32:
- flag |= GFP_DMA32;
- break;
-#endif
- };
-#endif /* CONFIG_FSL_SOC */
-
- page = alloc_pages_node(node, flag, get_order(size));
- if (page == NULL)
- return NULL;
- ret = page_address(page);
- memset(ret, 0, size);
- *dma_handle = __pa(ret) + get_dma_offset(dev);
-
- return ret;
-#endif
-}
-
-void __dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
-#ifdef CONFIG_NOT_COHERENT_CACHE
- __dma_free_coherent(size, vaddr);
-#else
- free_pages((unsigned long)vaddr, get_order(size));
-#endif
-}
-
-static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct iommu_table *iommu;
-
- /* The coherent mask may be smaller than the real mask, check if
- * we can really use the direct ops
- */
- if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
- return __dma_nommu_alloc_coherent(dev, size, dma_handle,
- flag, attrs);
-
- /* Ok we can't ... do we have an iommu ? If not, fail */
- iommu = get_iommu_table_base(dev);
- if (!iommu)
- return NULL;
-
- /* Try to use the iommu */
- return iommu_alloc_coherent(dev, iommu, size, dma_handle,
- dev->coherent_dma_mask, flag,
- dev_to_node(dev));
-}
-
-static void dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct iommu_table *iommu;
-
- /* See comments in dma_nommu_alloc_coherent() */
- if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
- return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
- attrs);
- /* Maybe we used an iommu ... */
- iommu = get_iommu_table_base(dev);
-
- /* If we hit that we should have never allocated in the first
- * place so how come we are freeing ?
- */
- if (WARN_ON(!iommu))
- return;
- iommu_free_coherent(iommu, size, vaddr, dma_handle);
-}
-
-int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
-{
- unsigned long pfn;
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
-#else
- pfn = page_to_pfn(virt_to_page(cpu_addr));
-#endif
- return remap_pfn_range(vma, vma->vm_start,
- pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
- sg->dma_length = sg->length;
-
- if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
- continue;
-
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
- }
-
- return nents;
-}
-
-static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
-}
-
-static u64 dma_nommu_get_required_mask(struct device *dev)
-{
- u64 end, mask;
-
- end = memblock_end_of_DRAM() + get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-static inline dma_addr_t dma_nommu_map_page(struct device *dev,
- struct page *page,
- unsigned long offset,
- size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync_page(page, offset, size, dir);
-
- return page_to_phys(page) + offset + get_dma_offset(dev);
-}
-
-static inline void dma_nommu_unmap_page(struct device *dev,
- dma_addr_t dma_address,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
-}
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-static inline void dma_nommu_sync_sg(struct device *dev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static inline void dma_nommu_sync_single(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- __dma_sync(bus_to_virt(dma_handle), size, direction);
-}
-#endif
-
-const struct dma_map_ops dma_nommu_ops = {
- .alloc = dma_nommu_alloc_coherent,
- .free = dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = dma_nommu_map_sg,
- .unmap_sg = dma_nommu_unmap_sg,
- .dma_supported = dma_nommu_dma_supported,
- .map_page = dma_nommu_map_page,
- .unmap_page = dma_nommu_unmap_page,
- .get_required_mask = dma_nommu_get_required_mask,
-#ifdef CONFIG_NOT_COHERENT_CACHE
- .sync_single_for_cpu = dma_nommu_sync_single,
- .sync_single_for_device = dma_nommu_sync_single,
- .sync_sg_for_cpu = dma_nommu_sync_sg,
- .sync_sg_for_device = dma_nommu_sync_sg,
-#endif
-};
-EXPORT_SYMBOL(dma_nommu_ops);
-
-int dma_set_coherent_mask(struct device *dev, u64 mask)
-{
- if (!dma_supported(dev, mask)) {
- /*
- * We need to special case the direct DMA ops which can
- * support a fallback for coherent allocations. There
- * is no dma_op->set_coherent_mask() so we have to do
- * things the hard way:
- */
- if (get_dma_ops(dev) != &dma_nommu_ops ||
- get_iommu_table_base(dev) == NULL ||
- !dma_iommu_dma_supported(dev, mask))
- return -EIO;
- }
- dev->coherent_dma_mask = mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_coherent_mask);
-
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (ppc_md.dma_set_mask)
- return ppc_md.dma_set_mask(dev, dma_mask);
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- if (phb->controller_ops.dma_set_mask)
- return phb->controller_ops.dma_set_mask(pdev, dma_mask);
- }
-
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
- *dev->dma_mask = dma_mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-u64 __dma_get_required_mask(struct device *dev)
-{
- const struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
-
- if (dma_ops->get_required_mask)
- return dma_ops->get_required_mask(dev);
-
- return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
-}
-
-u64 dma_get_required_mask(struct device *dev)
-{
- if (ppc_md.dma_get_required_mask)
- return ppc_md.dma_get_required_mask(dev);
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- if (phb->controller_ops.dma_get_required_mask)
- return phb->controller_ops.dma_get_required_mask(pdev);
- }
-
- return __dma_get_required_mask(dev);
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-
-static int __init dma_init(void)
-{
-#ifdef CONFIG_IBMVIO
- dma_debug_add_bus(&vio_bus_type);
-#endif
-
- return 0;
-}
-fs_initcall(dma_init);
-
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f432054..180b3a5 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2017, Nicholas Piggin, IBM Corporation
- * Licensed under GPLv2.
*/
#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt
@@ -101,7 +101,7 @@
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -144,6 +144,7 @@
mtspr(SPRN_HFSCR, 0);
}
mtspr(SPRN_FSCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
/*
* LPCR does not get cleared, to match behaviour with secondaries
@@ -666,8 +667,10 @@
m = &dt_cpu_feature_match_table[i];
if (!strcmp(f->name, m->name)) {
known = true;
- if (m->enable(f))
+ if (m->enable(f)) {
+ cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
break;
+ }
pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
f->name);
@@ -675,17 +678,12 @@
}
}
- if (!known && enable_unknown) {
- if (!feat_try_enable_unknown(f)) {
- pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
- f->name);
- return false;
- }
+ if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) {
+ pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+ f->name);
+ return false;
}
- if (m->cpu_ftr_bit_mask)
- cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
-
if (known)
pr_debug("enabling: %s\n", f->name);
else
@@ -694,9 +692,37 @@
return true;
}
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+ if (PVR_VER(pvr) == PVR_POWER9) {
+ /*
+ * Set the tlbie feature flag for anything below
+ * Nimbus DD 2.3 and Cumulus DD 1.3
+ */
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+}
+
static __init void cpufeatures_cpu_quirks(void)
{
- int version = mfspr(SPRN_PVR);
+ unsigned long version = mfspr(SPRN_PVR);
/*
* Not all quirks can be derived from the cpufeatures device tree.
@@ -715,10 +741,10 @@
if ((version & 0xffff0000) == 0x004e0000) {
cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
+ update_tlbie_feature_flag(version);
/*
* PKEY was not in the initial base or feature node
* specification, but it should become optional in the next
@@ -813,7 +839,6 @@
int len;
f = &dt_cpu_features[i];
- memset(f, 0, sizeof(struct dt_cpu_feature));
f->node = node;
@@ -1008,9 +1033,12 @@
/* Count and allocate space for cpu features */
of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
&nr_dt_cpu_features);
- dt_cpu_features = __va(
- memblock_alloc(sizeof(struct dt_cpu_feature)*
- nr_dt_cpu_features, PAGE_SIZE));
+ dt_cpu_features = memblock_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE);
+ if (!dt_cpu_features)
+ panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__,
+ sizeof(struct dt_cpu_feature) * nr_dt_cpu_features,
+ PAGE_SIZE);
cpufeatures_setup_start(isa);
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 0000000..3482118
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Early init before relocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/asm-prototypes.h>
+
+/*
+ * We're called here very early in the boot.
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings). -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+ unsigned long offset = reloc_offset();
+
+ /* First zero the BSS */
+ memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+
+ /*
+ * Identify the CPU type and fix up code sections
+ * that depend on which cpu we have.
+ */
+ identify_cpu(offset, mfspr(SPRN_PVR));
+
+ apply_feature_fixups();
+
+ return KERNELBASE + offset;
+}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c72767a..bc8a551 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright IBM Corporation 2001, 2005, 2006
* Copyright Dave Engebretsen & Todd Inglett 2001
* Copyright Linas Vepstas 2005, 2006
* Copyright 2001-2012 IBM Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
@@ -109,7 +96,14 @@
* frozen count in last hour exceeds this limit, the PE will
* be forced to be offline permanently.
*/
-int eeh_max_freezes = 5;
+u32 eeh_max_freezes = 5;
+
+/*
+ * Controls whether a recovery event should be scheduled when an
+ * isolated device is discovered. This is only really useful for
+ * debugging problems with the EEH core.
+ */
+bool eeh_debugfs_no_recover;
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
@@ -156,6 +150,16 @@
}
__setup("eeh=", eeh_setup);
+void eeh_show_enabled(void)
+{
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ pr_info("EEH: Recovery disabled by kernel parameter.\n");
+ else if (eeh_has_flag(EEH_ENABLED))
+ pr_info("EEH: Capable adapter found: recovery enabled.\n");
+ else
+ pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@@ -360,10 +364,19 @@
ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
- WARN_ON(hugepage_shift);
- pa = pte_pfn(*ptep) << PAGE_SHIFT;
- return pa | (token & (PAGE_SIZE-1));
+ pa = pte_pfn(*ptep);
+
+ /* On radix we can do hugepage mappings for io, so handle that */
+ if (hugepage_shift) {
+ pa <<= hugepage_shift;
+ pa |= token & ((1ul << hugepage_shift) - 1);
+ } else {
+ pa <<= PAGE_SHIFT;
+ pa |= token & (PAGE_SIZE - 1);
+ }
+
+ return pa;
}
/*
@@ -404,14 +417,12 @@
}
/* Isolate the PHB and send event */
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
- dump_stack();
eeh_send_failure_event(phb_pe);
-
return 1;
out:
eeh_serialize_unlock(flags);
@@ -438,7 +449,7 @@
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe, *parent_pe, *phb_pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@@ -457,8 +468,7 @@
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check for %s\n",
- eeh_pci_name(dev));
+ eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -498,12 +508,11 @@
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
- printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
- "location=%s driver=%s pci addr=%s\n",
+ eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
- eeh_driver_name(dev), eeh_pci_name(dev));
- printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -563,20 +572,15 @@
* with other functions on this device, and functions under
* bridges.
*/
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- phb_pe = eeh_phb_pe_get(pe->phb);
- pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
- pe->phb->global_number, pe->addr);
- pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
- dump_stack();
-
+ pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+ __func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -681,7 +685,7 @@
/* Check if the request is finished successfully */
if (active_flag) {
- rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (rc < 0)
return rc;
@@ -694,7 +698,7 @@
return rc;
}
-static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@@ -705,7 +709,7 @@
* state for the specified device
*/
if (!pdev || pdev == dev)
- return NULL;
+ return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@@ -718,18 +722,16 @@
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
-
- return NULL;
}
-static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
- return NULL;
+ return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@@ -738,8 +740,6 @@
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
-
- return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@@ -823,14 +823,15 @@
switch (state) {
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
- eeh_unfreeze_pe(pe, false);
+ eeh_unfreeze_pe(pe);
if (!(pe->type & EEH_PE_VF))
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
break;
case pcie_hot_reset:
- eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -838,7 +839,8 @@
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
- eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -846,7 +848,7 @@
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
return -EINVAL;
};
@@ -863,7 +865,7 @@
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@@ -871,8 +873,24 @@
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
+}
- return NULL;
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+ struct eeh_pe *pe;
+ int state;
+
+ eeh_for_each_pe(root, pe) {
+ if (eeh_pe_passed(pe)) {
+ state = eeh_ops->get_state(pe, NULL);
+ if (state &
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+ pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+ pe->phb->global_number, pe->addr);
+ eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+ }
+ }
+ }
}
/**
@@ -887,12 +905,12 @@
*
* This function will attempt to reset a PE three times before failing.
*/
-int eeh_pe_reset_full(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
{
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
- int i, state, ret;
+ int i, state = 0, ret;
/*
* Determine the type of reset to perform - hot or fundamental.
@@ -909,33 +927,42 @@
/* Make three attempts at resetting the bus */
for (i = 0; i < 3; i++) {
- ret = eeh_pe_reset(pe, type);
- if (ret)
- break;
-
- ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
- if (ret)
- break;
+ ret = eeh_pe_reset(pe, type, include_passed);
+ if (!ret)
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
+ include_passed);
+ if (ret) {
+ ret = -EIO;
+ pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
+ state, pe->phb->global_number, pe->addr, i + 1);
+ continue;
+ }
+ if (i)
+ pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
+ pe->phb->global_number, pe->addr, i + 1);
/* Wait until the PE is in a functioning state */
- state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (eeh_state_active(state))
- break;
-
+ state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (state < 0) {
- pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
- __func__, pe->phb->global_number, pe->addr);
+ pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
+ pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
break;
}
-
- /* Set error in case this is our last attempt */
- ret = -EIO;
- pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
- __func__, state, pe->phb->global_number, pe->addr, (i + 1));
+ if (eeh_state_active(state))
+ break;
+ else
+ pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
+ pe->phb->global_number, pe->addr, state, i + 1);
}
- eeh_pe_state_clear(pe, reset_state);
+ /* Resetting the PE may have unfrozen child PEs. If those PEs have been
+ * (potentially) passed through to a guest, re-freeze them:
+ */
+ if (!include_passed)
+ eeh_pe_refreeze_passed(pe);
+
+ eeh_pe_state_clear(pe, reset_state, true);
return ret;
}
@@ -1031,18 +1058,6 @@
.notifier_call = eeh_reboot_notifier,
};
-void eeh_probe_devices(void)
-{
- struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
-
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
-}
-
/**
* eeh_init - EEH initialization
*
@@ -1083,19 +1098,10 @@
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
+ eeh_addr_cache_init();
+
/* Initialize EEH event */
- ret = eeh_event_init();
- if (ret)
- return ret;
-
- eeh_probe_devices();
-
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else if (!eeh_has_flag(EEH_POSTPONED_PROBE))
- pr_info("EEH: No capable adapters found\n");
-
- return ret;
+ return eeh_event_init();
}
core_initcall_sync(eeh_init);
@@ -1164,15 +1170,14 @@
struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev || !eeh_enabled())
+ if (!dev)
return;
- pr_debug("EEH: Adding device %s\n", pci_name(dev));
-
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
+ eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
- pr_debug("EEH: Already referenced !\n");
+ eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@@ -1220,6 +1225,8 @@
{
struct pci_dev *dev;
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@@ -1273,10 +1280,10 @@
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
- pr_debug("EEH: Removing device %s\n", pci_name(dev));
+ dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
- pr_debug("EEH: Not referenced !\n");
+ dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1314,7 +1321,7 @@
edev->mode &= ~EEH_DEV_SYSFS;
}
-int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
+int eeh_unfreeze_pe(struct eeh_pe *pe)
{
int ret;
@@ -1332,10 +1339,6 @@
return ret;
}
- /* Clear software isolated state */
- if (sw_state && (pe->state & EEH_PE_ISOLATED))
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
return ret;
}
@@ -1387,7 +1390,10 @@
}
}
- return eeh_unfreeze_pe(pe, true);
+ ret = eeh_unfreeze_pe(pe);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+ return ret;
}
/**
@@ -1477,7 +1483,7 @@
if (!dev)
return 0;
- if (dev->iommu_group) {
+ if (device_iommu_mapped(dev)) {
*ppdev = pdev;
return 1;
}
@@ -1617,13 +1623,12 @@
}
EXPORT_SYMBOL_GPL(eeh_pe_get_state);
-static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
{
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
int ret = 0;
- /* Restore config space */
eeh_pe_restore_bars(pe);
/*
@@ -1644,7 +1649,14 @@
}
/* The PE is still in frozen state */
- return eeh_unfreeze_pe(pe, true);
+ if (include_passed || !eeh_pe_passed(pe)) {
+ ret = eeh_unfreeze_pe(pe);
+ } else
+ pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+ pe->phb->global_number, pe->addr);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
+ return ret;
}
@@ -1657,7 +1669,7 @@
* indicated type, either fundamental reset or hot reset.
* PE reset is the most important part for error recovery.
*/
-int eeh_pe_reset(struct eeh_pe *pe, int option)
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
{
int ret = 0;
@@ -1671,11 +1683,11 @@
switch (option) {
case EEH_RESET_DEACTIVATE:
ret = eeh_ops->reset(pe, option);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
if (ret)
break;
- ret = eeh_pe_reenable_devices(pe);
+ ret = eeh_pe_reenable_devices(pe, include_passed);
break;
case EEH_RESET_HOT:
case EEH_RESET_FUNDAMENTAL:
@@ -1801,22 +1813,256 @@
return 0;
}
-static int eeh_freeze_dbgfs_set(void *data, u64 val)
+DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
+
+static ssize_t eeh_force_recover_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
- eeh_max_freezes = val;
+ struct pci_controller *hose;
+ uint32_t phbid, pe_no;
+ struct eeh_pe *pe;
+ char buf[20];
+ int ret;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ /*
+ * When PE is NULL the event is a "special" event. Rather than
+ * recovering a specific PE it forces the EEH core to scan for failed
+ * PHBs and recovers each. This needs to be done before any device
+ * recoveries can occur.
+ */
+ if (!strncmp(buf, "hwcheck", 7)) {
+ __eeh_send_failure_event(NULL);
+ return count;
+ }
+
+ ret = sscanf(buf, "%x:%x", &phbid, &pe_no);
+ if (ret != 2)
+ return -EINVAL;
+
+ hose = pci_find_controller_for_domain(phbid);
+ if (!hose)
+ return -ENODEV;
+
+ /* Retrieve PE */
+ pe = eeh_pe_get(hose, pe_no, 0);
+ if (!pe)
+ return -ENODEV;
+
+ /*
+ * We don't do any state checking here since the detection
+ * process is async to the recovery process. The recovery
+ * thread *should* not break even if we schedule a recovery
+ * from an odd state (e.g. PE removed, or recovery of a
+ * non-isolated PE)
+ */
+ __eeh_send_failure_event(pe);
+
+ return ret < 0 ? ret : count;
+}
+
+static const struct file_operations eeh_force_recover_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_force_recover_write,
+};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ struct eeh_dev *edev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ pci_err(pdev, "No eeh_dev for this device!\n");
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ ret = eeh_dev_check_failure(edev);
+ pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
+ domain, bus, dev, fn, ret);
+
+ pci_dev_put(pdev);
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_check_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static int eeh_debugfs_break_device(struct pci_dev *pdev)
+{
+ struct resource *bar = NULL;
+ void __iomem *mapped;
+ u16 old, bit;
+ int i, pos;
+
+ /* Do we have an MMIO BAR to disable? */
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ struct resource *r = &pdev->resource[i];
+
+ if (!r->flags || !r->start)
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ continue;
+ if (r->flags & IORESOURCE_UNSET)
+ continue;
+
+ bar = r;
+ break;
+ }
+
+ if (!bar) {
+ pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+ return -ENXIO;
+ }
+
+ pci_err(pdev, "Going to break: %pR\n", bar);
+
+ if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+ return -ENXIO;
+#else
+ /*
+ * VFs don't have a per-function COMMAND register, so the best
+ * we can do is clear the Memory Space Enable bit in the PF's
+ * SRIOV control reg.
+ *
+ * Unfortunately, this requires that we have a PF (i.e doesn't
+ * work for a passed-through VF) and it has the potential side
+ * effect of also causing an EEH on every other VF under the
+ * PF. Oh well.
+ */
+ pdev = pdev->physfn;
+ if (!pdev)
+ return -ENXIO; /* passed through VFs have no PF */
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ pos += PCI_SRIOV_CTRL;
+ bit = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+ } else {
+ bit = PCI_COMMAND_MEMORY;
+ pos = PCI_COMMAND;
+ }
+
+ /*
+ * Process here is:
+ *
+ * 1. Disable Memory space.
+ *
+ * 2. Perform an MMIO to the device. This should result in an error
+ * (CA / UR) being raised by the device which results in an EEH
+ * PE freeze. Using the in_8() accessor skips the eeh detection hook
+ * so the freeze hook so the EEH Detection machinery won't be
+ * triggered here. This is to match the usual behaviour of EEH
+ * where the HW will asyncronously freeze a PE and it's up to
+ * the kernel to notice and deal with it.
+ *
+ * 3. Turn Memory space back on. This is more important for VFs
+ * since recovery will probably fail if we don't. For normal
+ * the COMMAND register is reset as a part of re-initialising
+ * the device.
+ *
+ * Breaking stuff is the point so who cares if it's racy ;)
+ */
+ pci_read_config_word(pdev, pos, &old);
+
+ mapped = ioremap(bar->start, PAGE_SIZE);
+ if (!mapped) {
+ pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+ return -ENXIO;
+ }
+
+ pci_write_config_word(pdev, pos, old & ~bit);
+ in_8(mapped);
+ pci_write_config_word(pdev, pos, old);
+
+ iounmap(mapped);
+
return 0;
}
-static int eeh_freeze_dbgfs_get(void *data, u64 *val)
+static ssize_t eeh_dev_break_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
- *val = eeh_max_freezes;
- return 0;
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ ret = eeh_debugfs_break_device(pdev);
+ pci_dev_put(pdev);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
}
-DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
- eeh_enable_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
- eeh_freeze_dbgfs_set, "0x%llx\n");
+static const struct file_operations eeh_dev_break_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_break_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -1824,12 +2070,24 @@
if (machine_is(pseries) || machine_is(powernv)) {
proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
#ifdef CONFIG_DEBUG_FS
- debugfs_create_file("eeh_enable", 0600,
- powerpc_debugfs_root, NULL,
- &eeh_enable_dbgfs_ops);
- debugfs_create_file("eeh_max_freezes", 0600,
- powerpc_debugfs_root, NULL,
- &eeh_freeze_dbgfs_ops);
+ debugfs_create_file_unsafe("eeh_enable", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_enable_dbgfs_ops);
+ debugfs_create_u32("eeh_max_freezes", 0600,
+ powerpc_debugfs_root, &eeh_max_freezes);
+ debugfs_create_bool("eeh_disable_recovery", 0600,
+ powerpc_debugfs_root,
+ &eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_dev_check", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_check_fops);
+ debugfs_create_file_unsafe("eeh_dev_break", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_break_fops);
+ debugfs_create_file_unsafe("eeh_force_recover", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_force_recover_fops);
+ eeh_cache_debugfs_init();
#endif
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 201943d..cf11277 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI address cache; allows the lookup of PCI devices based on I/O address
*
* Copyright IBM Corporation 2004
* Copyright Linas Vepstas <linas@austin.ibm.com> 2004
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/list.h>
@@ -26,10 +13,13 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <asm/pci-bridge.h>
+#include <asm/debugfs.h>
#include <asm/ppc-pci.h>
/**
+ * DOC: Overview
+ *
* The pci address cache subsystem. This subsystem places
* PCI device address resources into a red-black tree, sorted
* according to the address range, so that given only an i/o
@@ -46,6 +36,7 @@
* than any hash algo I could think of for this problem, even
* with the penalty of slow pointer chases for d-cache misses).
*/
+
struct pci_io_addr_range {
struct rb_node rb_node;
resource_size_t addr_lo;
@@ -113,7 +104,7 @@
while (n) {
struct pci_io_addr_range *piar;
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n",
+ pr_info("PCI: %s addr range %d [%pap-%pap]: %s\n",
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
cnt++;
@@ -157,10 +148,8 @@
piar->pcidev = dev;
piar->flags = flags;
-#ifdef DEBUG
- pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
- &alo, &ahi, pci_name(dev));
-#endif
+ eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+ &alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -240,6 +229,8 @@
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
+ eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+ &piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -267,40 +258,38 @@
}
/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
*
- * Build a cache of pci i/o addresses. This cache will be used to
+ * Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
*/
-void eeh_addr_cache_build(void)
+void eeh_addr_cache_init(void)
{
- struct pci_dn *pdn;
- struct eeh_dev *edev;
- struct pci_dev *dev = NULL;
-
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+}
- for_each_pci_dev(dev) {
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn)
- continue;
+static int eeh_addr_cache_show(struct seq_file *s, void *v)
+{
+ struct pci_io_addr_range *piar;
+ struct rb_node *n;
- edev = pdn_to_eeh_dev(pdn);
- if (!edev)
- continue;
+ spin_lock(&pci_io_addr_cache_root.piar_lock);
+ for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- dev->dev.archdata.edev = edev;
- edev->pdev = dev;
-
- eeh_addr_cache_insert_dev(dev);
- eeh_sysfs_add_device(dev);
+ seq_printf(s, "%s addr range [%pap-%pap]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
+ &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
+ spin_unlock(&pci_io_addr_cache_root.piar_lock);
-#ifdef DEBUG
- /* Verify tree built up above, echo back the list of addrs. */
- eeh_addr_cache_print(&pci_io_addr_cache_root);
-#endif
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
+
+void eeh_cache_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("eeh_address_cache", 0400,
+ powerpc_debugfs_root, NULL,
+ &eeh_addr_cache_fops);
}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index a34e691..7370185 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The file intends to implement dynamic creation of EEH device, which will
* be bound with OF node and PCI device simutaneously. The EEH devices would
@@ -15,20 +16,6 @@
* PHB is newly inserted, we also need create EEH devices accordingly.
*
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/export.h>
@@ -60,8 +47,8 @@
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
- INIT_LIST_HEAD(&edev->list);
- INIT_LIST_HEAD(&edev->rmv_list);
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
return edev;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 67619b4..d9279d0 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -27,6 +27,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -35,8 +36,8 @@
#include <asm/rtas.h>
struct eeh_rmv_data {
- struct list_head edev_list;
- int removed;
+ struct list_head removed_vf_list;
+ int removed_dev_count;
};
static int eeh_result_priority(enum pci_ers_result result)
@@ -60,7 +61,7 @@
}
};
-const char *pci_ers_result_name(enum pci_ers_result result)
+static const char *pci_ers_result_name(enum pci_ers_result result)
{
switch (result) {
case PCI_ERS_RESULT_NONE:
@@ -81,23 +82,6 @@
}
};
-static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
- edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
-
- va_end(args);
-}
-
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@@ -113,8 +97,16 @@
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
- return (edev->pdev && !eeh_dev_removed(edev) &&
- !eeh_pe_passed(edev->pe));
+ if (!edev->pdev)
+ return false;
+ if (edev->pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+ if (eeh_dev_removed(edev))
+ return false;
+ if (eeh_pe_passed(edev->pe))
+ return false;
+
+ return true;
}
/**
@@ -214,12 +206,12 @@
}
}
-static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* We cannot access the config space on some adapters.
@@ -229,14 +221,13 @@
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
- return NULL;
+ return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_save_state(pdev);
- return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@@ -274,16 +265,27 @@
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+ struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
+ struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
- device_lock(&edev->pdev->dev);
+ pci_lock_rescan_remove();
+ pdev = edev->pdev;
+ if (pdev)
+ get_device(&pdev->dev);
+ pci_unlock_rescan_remove();
+ if (!pdev) {
+ eeh_edev_info(edev, "no device");
+ return;
+ }
+ device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
- driver = eeh_pcid_get(edev->pdev);
+ driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@@ -292,7 +294,7 @@
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
- new_result = fn(edev, driver);
+ new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@@ -301,12 +303,15 @@
new_result);
}
if (driver)
- eeh_pcid_put(edev->pdev);
+ eeh_pcid_put(pdev);
} else {
- eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
+ eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
- device_unlock(&edev->pdev->dev);
+ device_unlock(&pdev->dev);
+ if (edev->pdev != pdev)
+ eeh_edev_warn(edev, "Device changed during processing!\n");
+ put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@@ -333,20 +338,20 @@
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
- struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
- rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+ rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
- pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@@ -359,12 +364,13 @@
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
- return driver->err_handler->mmio_enabled(edev->pdev);
+ return driver->err_handler->mmio_enabled(pdev);
}
/**
@@ -378,20 +384,21 @@
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
- return driver->err_handler->slot_reset(edev->pdev);
+ return driver->err_handler->slot_reset(pdev);
}
-static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* The content in the config space isn't saved because
@@ -400,18 +407,17 @@
* EEH device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
- if (list_is_last(&edev->list, &edev->pe->edevs))
+ if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
- return NULL;
+ return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_restore_state(pdev);
- return NULL;
}
/**
@@ -424,13 +430,14 @@
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
- driver->err_handler->resume(edev->pdev);
+ driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@@ -449,6 +456,7 @@
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@@ -458,24 +466,20 @@
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
- rc = driver->err_handler->error_detected(edev->pdev,
+ rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
- pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
-static void *eeh_add_virt_device(void *data, void *userdata)
+static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
- struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
- pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@@ -489,17 +493,16 @@
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
-static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
- int *removed = rmv_data ? &rmv_data->removed : NULL;
/*
* Actually, we should remove the PCI bridges as well.
@@ -508,41 +511,29 @@
* support EEH. So we just care about PCI devices for
* simplicity here.
*/
- if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
+ if (!eeh_edev_actionable(edev) ||
+ (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
+ return;
- /*
- * We rely on count-based pcibios_release_device() to
- * detach permanently offlined PEs. Unfortunately, that's
- * not reliable enough. We might have the permanently
- * offlined PEs attached, but we needn't take care of
- * them and their child devices.
- */
- if (eeh_dev_removed(edev))
- return NULL;
-
- if (removed) {
- if (eeh_pe_passed(edev->pe))
- return NULL;
+ if (rmv_data) {
driver = eeh_pcid_get(dev);
if (driver) {
if (driver->err_handler &&
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
- return NULL;
+ return;
}
eeh_pcid_put(dev);
}
}
/* Remove it from PCI subsystem */
- pr_debug("EEH: Removing %s without EEH sensitive driver\n",
- pci_name(dev));
- edev->bus = dev->bus;
+ pr_info("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
edev->mode |= EEH_DEV_DISCONNECTED;
- if (removed)
- (*removed)++;
+ if (rmv_data)
+ rmv_data->removed_dev_count++;
if (edev->physfn) {
#ifdef CONFIG_PCI_IOV
@@ -558,14 +549,12 @@
pdn->pe_number = IODA_INVALID_PE;
#endif
if (rmv_data)
- list_add(&edev->rmv_list, &rmv_data->edev_list);
+ list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
} else {
pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
-
- return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@@ -590,34 +579,22 @@
* PE reset (for 3 times), we try to clear the frozen state
* for 3 times as well.
*/
-static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
{
- bool clear_sw_state = *(bool *)flag;
- int i, rc = 1;
+ struct eeh_pe *pe;
+ int i;
- for (i = 0; rc && i < 3; i++)
- rc = eeh_unfreeze_pe(pe, clear_sw_state);
-
- /* Stop immediately on any errors */
- if (rc) {
- pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
- __func__, rc, pe->phb->global_number, pe->addr);
- return (void *)pe;
+ eeh_for_each_pe(root, pe) {
+ if (include_passed || !eeh_pe_passed(pe)) {
+ for (i = 0; i < 3; i++)
+ if (!eeh_unfreeze_pe(pe))
+ break;
+ if (i >= 3)
+ return -EIO;
+ }
}
-
- return NULL;
-}
-
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
- bool clear_sw_state)
-{
- void *rc;
-
- rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
- if (!rc)
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
- return rc ? -EIO : 0;
+ eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
+ return 0;
}
int eeh_pe_reset_and_recover(struct eeh_pe *pe)
@@ -635,16 +612,16 @@
eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
/* Issue reset */
- ret = eeh_pe_reset_full(pe);
+ ret = eeh_pe_reset_full(pe, true);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return ret;
}
/* Unfreeze the PE */
ret = eeh_clear_pe_frozen_state(pe, true);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return ret;
}
@@ -652,7 +629,7 @@
eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
/* Clear recovery mode */
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return 0;
}
@@ -675,6 +652,11 @@
time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
+ struct eeh_pe *tmp_pe;
+ bool any_passed = false;
+
+ eeh_for_each_pe(pe, tmp_pe)
+ any_passed |= eeh_pe_passed(tmp_pe);
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -687,7 +669,7 @@
* into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+ if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
} else {
pci_lock_rescan_remove();
@@ -704,7 +686,7 @@
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- rc = eeh_pe_reset_full(pe);
+ rc = eeh_pe_reset_full(pe, false);
if (rc)
return rc;
@@ -727,7 +709,7 @@
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (!driver_eeh_aware || rmv_data->removed) {
+ if (!driver_eeh_aware || rmv_data->removed_dev_count) {
pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
(driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
@@ -737,17 +719,17 @@
* PE. We should disconnect it so the binding can be
* rebuilt when adding PCI devices.
*/
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
if (pe->type & EEH_PE_VF) {
- eeh_add_virt_device(edev, NULL);
+ eeh_add_virt_device(edev);
} else {
if (!driver_eeh_aware)
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
pci_hp_add_devices(bus);
}
}
- eeh_pe_state_clear(pe, EEH_PE_KEEP);
+ eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
pe->tstamp = tstamp;
pe->freeze_count = cnt;
@@ -761,6 +743,99 @@
*/
#define MAX_WAIT_FOR_RECOVERY 300
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+ struct eeh_pe *child_pe, *tmp;
+
+ list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+ eeh_pe_cleanup(child_pe);
+
+ if (pe->state & EEH_PE_KEEP)
+ return;
+
+ if (!(pe->state & EEH_PE_INVALID))
+ return;
+
+ if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ }
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+ u8 state;
+ int rc;
+
+ if (!pdev)
+ return false;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return true;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->get_adapter_status)
+ return true;
+
+ /* set the attention indicator while we've got the slot ops */
+ if (ops->set_attention_status)
+ ops->set_attention_status(slot->hotplug, 1);
+
+ rc = ops->get_adapter_status(slot->hotplug, &state);
+ if (rc)
+ return true;
+
+ return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+
+ if (!pdev)
+ return;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->set_attention_status)
+ return;
+
+ ops->set_attention_status(slot->hotplug, 0);
+}
+
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@@ -789,7 +864,9 @@
struct eeh_pe *tmp_pe;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
- struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
+ struct eeh_rmv_data rmv_data =
+ {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+ int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -798,7 +875,59 @@
return;
}
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ /*
+ * When devices are hot-removed we might get an EEH due to
+ * a driver attempting to touch the MMIO space of a removed
+ * device. In this case we don't have a device to recover
+ * so suppress the event if we can't find any present devices.
+ *
+ * The hotplug driver should take care of tearing down the
+ * device itself.
+ */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ if (eeh_slot_presence_check(edev->pdev))
+ devices++;
+
+ if (!devices) {
+ pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pe->phb->global_number, pe->addr);
+ goto out; /* nothing to recover */
+ }
+
+ /* Log the event */
+ if (pe->type & EEH_PE_PHB) {
+ pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pe->phb->global_number, eeh_pe_loc_get(pe));
+ } else {
+ struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ }
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Print the saved stack trace now that we've verified there's
+ * something to recover.
+ */
+ if (pe->trace_entries) {
+ void **ptrs = (void **) pe->stack_trace;
+ int i;
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+
+ /* FIXME: Use the same format as dump_stack() */
+ pr_err("EEH: Call Trace:\n");
+ for (i = 0; i < pe->trace_entries; i++)
+ pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+ pe->trace_entries = 0;
+ }
+#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@@ -806,10 +935,12 @@
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
}
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
+
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -821,31 +952,39 @@
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
- &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if ((pe->type & EEH_PE_PHB) &&
+ result != PCI_ERS_RESULT_NONE &&
+ result != PCI_ERS_RESULT_NEED_RESET)
+ result = PCI_ERS_RESULT_NEED_RESET;
+ }
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- goto hard_fail;
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ }
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+ }
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -857,7 +996,7 @@
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
}
}
@@ -866,9 +1005,9 @@
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
- if (rc < 0)
- goto hard_fail;
- if (rc) {
+ if (rc < 0) {
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -882,9 +1021,9 @@
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
- if (rc < 0)
- goto hard_fail;
- if (rc) {
+ if (rc < 0) {
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
@@ -892,17 +1031,11 @@
* is still in frozen state. Clear it before
* resuming the PE.
*/
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
result = PCI_ERS_RESULT_RECOVERED;
}
}
- /* If any device has a hard failure, then shut off everything. */
- if (result == PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: Device driver gave up\n");
- goto hard_fail;
- }
-
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
@@ -910,89 +1043,95 @@
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
+ }
+ }
+
+ if ((result == PCI_ERS_RESULT_RECOVERED) ||
+ (result == PCI_ERS_RESULT_NONE)) {
+ /*
+ * For those hot removed VFs, we should add back them after PF
+ * get recovered properly.
+ */
+ list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+ rmv_entry) {
+ eeh_add_virt_device(edev);
+ list_del(&edev->rmv_entry);
}
- pr_info("EEH: Notify device drivers "
- "the completion of reset\n");
- result = PCI_ERS_RESULT_NONE;
+ /* Tell all device drivers that they can resume operations */
+ pr_info("EEH: Notify device driver to resume\n");
eeh_set_channel_state(pe, pci_channel_io_normal);
eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
- }
+ eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+ eeh_for_each_pe(pe, tmp_pe) {
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+ edev->in_error = false;
+ }
+ }
- /* All devices should claim they have recovered by now. */
- if ((result != PCI_ERS_RESULT_RECOVERED) &&
- (result != PCI_ERS_RESULT_NONE)) {
- pr_warn("EEH: Not recovered\n");
- goto hard_fail;
- }
+ pr_info("EEH: Recovery successful.\n");
+ } else {
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- /*
- * For those hot removed VFs, we should add back them after PF get
- * recovered properly.
- */
- list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
- eeh_add_virt_device(edev, NULL);
- list_del(&edev->rmv_list);
- }
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Tell all device drivers that they can resume operations */
- pr_info("EEH: Notify device driver to resume\n");
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("resume", pe, eeh_report_resume, NULL);
- eeh_for_each_pe(pe, tmp_pe) {
- eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
- edev->mode &= ~EEH_DEV_NO_HANDLER;
- edev->in_error = false;
+ /* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
+
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
}
- pr_info("EEH: Recovery successful.\n");
- goto final;
-
-hard_fail:
+out:
/*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
+ * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+ * we don't want to modify the PE tree structure so we do it here.
*/
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ eeh_pe_cleanup(pe);
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+ /* clear the slot attention LED for all recovered devices */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ eeh_clear_slot_attention(edev->pdev);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
-
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
-
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
- }
-final:
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
/**
@@ -1004,7 +1143,8 @@
*/
void eeh_handle_special_event(void)
{
- struct eeh_pe *pe, *phb_pe;
+ struct eeh_pe *pe, *phb_pe, *tmp_pe;
+ struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@@ -1026,7 +1166,7 @@
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe) continue;
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
}
eeh_serialize_unlock(flags);
@@ -1041,11 +1181,9 @@
/* Purge all events of the PHB */
eeh_remove_event(pe, true);
- if (rc == EEH_NEXT_ERR_DEAD_PHB)
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
- else
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+ if (rc != EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
@@ -1065,6 +1203,7 @@
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@@ -1075,8 +1214,12 @@
(phb_pe->state & EEH_PE_RECOVERING))
continue;
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Notify all devices to be down */
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
eeh_pe_report(
"error_detected(permanent failure)", pe,
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 61c9356..a7a8dc1 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -1,17 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (c) 2005 Linas Vepstas <linas@linas.org>
*/
@@ -35,7 +23,7 @@
*/
static DEFINE_SPINLOCK(eeh_eventlist_lock);
-static struct semaphore eeh_eventlist_sem;
+static DECLARE_COMPLETION(eeh_eventlist_event);
static LIST_HEAD(eeh_eventlist);
/**
@@ -52,10 +40,9 @@
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_pe *pe;
while (!kthread_should_stop()) {
- if (down_interruptible(&eeh_eventlist_sem))
+ if (wait_for_completion_interruptible(&eeh_eventlist_event))
break;
/* Fetch EEH event from the queue */
@@ -71,19 +58,10 @@
continue;
/* We might have event without binding PE */
- pe = event->pe;
- if (pe) {
- if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%x\n",
- pe->phb->global_number);
- else
- pr_info("EEH: Detected PCI bus error on "
- "PHB#%x-PE#%x\n",
- pe->phb->global_number, pe->addr);
- eeh_handle_normal_event(pe);
- } else {
+ if (event->pe)
+ eeh_handle_normal_event(event->pe);
+ else
eeh_handle_special_event();
- }
kfree(event);
}
@@ -102,9 +80,6 @@
struct task_struct *t;
int ret = 0;
- /* Initialize semaphore */
- sema_init(&eeh_eventlist_sem, 0);
-
t = kthread_run(eeh_event_handler, NULL, "eehd");
if (IS_ERR(t)) {
ret = PTR_ERR(t);
@@ -124,7 +99,7 @@
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event(struct eeh_pe *pe)
+int __eeh_send_failure_event(struct eeh_pe *pe)
{
unsigned long flags;
struct eeh_event *event;
@@ -136,17 +111,49 @@
}
event->pe = pe;
+ /*
+ * Mark the PE as recovering before inserting it in the queue.
+ * This prevents the PE from being free()ed by a hotplug driver
+ * while the PE is sitting in the event queue.
+ */
+ if (pe) {
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Save the current stack trace so we can dump it from the
+ * event handler thread.
+ */
+ pe->trace_entries = stack_trace_save(pe->stack_trace,
+ ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ }
+
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);
spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
/* For EEH deamon to knick in */
- up(&eeh_eventlist_sem);
+ complete(&eeh_eventlist_event);
return 0;
}
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+ /*
+ * If we've manually supressed recovery events via debugfs
+ * then just drop it on the floor.
+ */
+ if (eeh_debugfs_no_recover) {
+ pr_err("EEH: Event dropped due to no_recover setting\n");
+ return 0;
+ }
+
+ return __eeh_send_failure_event(pe);
+}
+
/**
* eeh_remove_event - Remove EEH event from the queue
* @pe: Event binding to the PE
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1b238ec..177852e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The file intends to implement PE based on the information from
* platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
@@ -6,20 +7,6 @@
* PE is only meaningful in one PHB domain.
*
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/delay.h>
@@ -75,7 +62,6 @@
pe->type = type;
pe->phb = phb;
INIT_LIST_HEAD(&pe->child_list);
- INIT_LIST_HEAD(&pe->child);
INIT_LIST_HEAD(&pe->edevs);
pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
@@ -110,6 +96,57 @@
}
/**
+ * eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in millisecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+int eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+ int ret;
+ int mwait;
+
+ /*
+ * According to PAPR, the state of PE might be temporarily
+ * unavailable. Under the circumstance, we have to wait
+ * for indicated time determined by firmware. The maximal
+ * wait time is 5 minutes, which is acquired from the original
+ * EEH implementation. Also, the original implementation
+ * also defined the minimal wait time as 1 second.
+ */
+#define EEH_STATE_MIN_WAIT_TIME (1000)
+#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
+
+ while (1) {
+ ret = eeh_ops->get_state(pe, &mwait);
+
+ if (ret != EEH_STATE_UNAVAILABLE)
+ return ret;
+
+ if (max_wait <= 0) {
+ pr_warn("%s: Timeout when getting PE's state (%d)\n",
+ __func__, max_wait);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (mwait < EEH_STATE_MIN_WAIT_TIME) {
+ pr_warn("%s: Firmware returned bad wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MIN_WAIT_TIME;
+ } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+ pr_warn("%s: Firmware returned too long wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MAX_WAIT_TIME;
+ }
+
+ msleep(min(mwait, max_wait));
+ max_wait -= mwait;
+ }
+}
+
+/**
* eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
* @phb: PCI controller
*
@@ -194,29 +231,22 @@
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
+void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
- void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
- return NULL;
+ return;
}
/* Traverse root PE */
- eeh_for_each_pe(root, pe) {
- eeh_pe_for_each_dev(pe, edev, tmp) {
- ret = fn(edev, flag);
- if (ret)
- return ret;
- }
- }
-
- return NULL;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ fn(edev, flag);
}
/**
@@ -342,8 +372,7 @@
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, config_addr, pdn->phb->global_number);
+ eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@@ -354,42 +383,34 @@
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
- if (pe && !(pe->type & EEH_PE_INVALID)) {
- /* Mark the PE as type of PCI bus */
- pe->type = EEH_PE_BUS;
- edev->pe = pe;
+ if (pe) {
+ if (pe->type & EEH_PE_INVALID) {
+ list_add_tail(&edev->entry, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
- /* Put the edev to PE */
- list_add_tail(&edev->list, &pe->edevs);
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr);
- return 0;
- } else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->list, &pe->edevs);
- edev->pe = pe;
- /*
- * We're running to here because of PCI hotplug caused by
- * EEH recovery. We need clear EEH_PE_INVALID until the top.
- */
- parent = pe;
- while (parent) {
- if (!(parent->type & EEH_PE_INVALID))
- break;
- parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
- parent = parent->parent;
+ eeh_edev_dbg(edev,
+ "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
+ } else {
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
+
+ /* Put the edev to PE */
+ list_add_tail(&edev->entry, &pe->edevs);
+ eeh_edev_dbg(edev, "Added to bus PE\n");
}
-
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
- "PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
return 0;
}
@@ -429,15 +450,10 @@
* link the EEH device accordingly.
*/
list_add_tail(&pe->child, &parent->child_list);
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
- "Device PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
return 0;
}
@@ -454,22 +470,18 @@
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
+ bool keep, recover;
int cnt;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- if (!edev->pe) {
- pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ pe = eeh_dev_to_pe(edev);
+ if (!pe) {
+ eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
/* Remove the EEH device */
- pe = eeh_dev_to_pe(edev);
edev->pe = NULL;
- list_del(&edev->list);
+ list_del(&edev->entry);
/*
* Check if the parent PE includes any EEH devices.
@@ -479,10 +491,21 @@
*/
while (1) {
parent = pe->parent;
+
+ /* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
- if (!(pe->state & EEH_PE_KEEP)) {
+ /*
+ * XXX: KEEP is set while resetting a PE. I don't think it's
+ * ever set without RECOVERING also being set. I could
+ * be wrong though so catch that with a WARN.
+ */
+ keep = !!(pe->state & EEH_PE_KEEP);
+ recover = !!(pe->state & EEH_PE_RECOVERING);
+ WARN_ON(keep && !recover);
+
+ if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -491,6 +514,15 @@
break;
}
} else {
+ /*
+ * Mark the PE as invalid. At the end of the recovery
+ * process any invalid PEs will be garbage collected.
+ *
+ * We need to delay the free()ing of them since we can
+ * remove edev's while traversing the PE tree which
+ * might trigger the removal of a PE and we can't
+ * deal with that (yet).
+ */
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -541,44 +573,6 @@
}
/**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
- *
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
- */
-static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag)
-{
- int state = *((int *)flag);
- struct eeh_dev *edev, *tmp;
- struct pci_dev *pdev;
-
- /* Keep the state of permanently removed PE intact */
- if (pe->state & EEH_PE_REMOVED)
- return NULL;
-
- pe->state |= state;
-
- /* Offline PCI devices if applicable */
- if (!(state & EEH_PE_ISOLATED))
- return NULL;
-
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- pdev->error_state = pci_channel_io_frozen;
- }
-
- /* Block PCI config access if required */
- if (pe->state & EEH_PE_CFG_RESTRICTED)
- pe->state |= EEH_PE_CFG_BLOCKED;
-
- return NULL;
-}
-
-/**
* eeh_pe_state_mark - Mark specified state for PE and its associated device
* @pe: EEH PE
*
@@ -586,19 +580,49 @@
* is used to mark appropriate state for the affected PEs and the
* associated devices.
*/
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+void eeh_pe_state_mark(struct eeh_pe *root, int state)
{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+ struct eeh_pe *pe;
+
+ eeh_for_each_pe(root, pe)
+ if (!(pe->state & EEH_PE_REMOVED))
+ pe->state |= state;
}
EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
-static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
+/**
+ * eeh_pe_mark_isolated
+ * @pe: EEH PE
+ *
+ * Record that a PE has been isolated by marking the PE and it's children as
+ * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
+ * as pci_channel_io_frozen.
+ */
+void eeh_pe_mark_isolated(struct eeh_pe *root)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ eeh_pe_state_mark(root, EEH_PE_ISOLATED);
+ eeh_for_each_pe(root, pe) {
+ list_for_each_entry(edev, &pe->edevs, entry) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+ /* Block PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state |= EEH_PE_CFG_BLOCKED;
+ }
+}
+EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
+
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
-
- return NULL;
}
/**
@@ -613,84 +637,52 @@
}
/**
- * __eeh_pe_state_clear - Clear state for the PE
+ * eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE
- * @flag: state
+ * @state: state
+ * @include_passed: include passed-through devices?
*
* The function is used to clear the indicated state from the
* given PE. Besides, we also clear the check count of the PE
* as well.
*/
-static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag)
+void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
{
- int state = *((int *)flag);
+ struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
- /* Keep the state of permanently removed PE intact */
- if (pe->state & EEH_PE_REMOVED)
- return NULL;
-
- pe->state &= ~state;
-
- /*
- * Special treatment on clearing isolated state. Clear
- * check count since last isolation and put all affected
- * devices to normal state.
- */
- if (!(state & EEH_PE_ISOLATED))
- return NULL;
-
- pe->check_count = 0;
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (!pdev)
+ eeh_for_each_pe(root, pe) {
+ /* Keep the state of permanently removed PE intact */
+ if (pe->state & EEH_PE_REMOVED)
continue;
- pdev->error_state = pci_channel_io_normal;
+ if (!include_passed && eeh_pe_passed(pe))
+ continue;
+
+ pe->state &= ~state;
+
+ /*
+ * Special treatment on clearing isolated state. Clear
+ * check count since last isolation and put all affected
+ * devices to normal state.
+ */
+ if (!(state & EEH_PE_ISOLATED))
+ continue;
+
+ pe->check_count = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
+
+ pdev->error_state = pci_channel_io_normal;
+ }
+
+ /* Unblock PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state &= ~EEH_PE_CFG_BLOCKED;
}
-
- /* Unblock PCI config access if required */
- if (pe->state & EEH_PE_CFG_RESTRICTED)
- pe->state &= ~EEH_PE_CFG_BLOCKED;
-
- return NULL;
-}
-
-/**
- * eeh_pe_state_clear - Clear state for the PE and its children
- * @pe: PE
- * @state: state to be cleared
- *
- * When the PE and its children has been recovered from error,
- * we need clear the error state for that. The function is used
- * for the purpose.
- */
-void eeh_pe_state_clear(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
-}
-
-/**
- * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space
- * @pe: PE
- * @state: PE state to be set
- *
- * Set specified flag to PE and its child PEs. The PCI config space
- * of some PEs is blocked automatically when EEH_PE_ISOLATED is set,
- * which isn't needed in some situations. The function allows to set
- * the specified flag to indicated PEs without blocking their PCI
- * config space.
- */
-void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
- if (!(state & EEH_PE_ISOLATED))
- return;
-
- /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */
- state = EEH_PE_CFG_BLOCKED;
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
}
/*
@@ -718,17 +710,13 @@
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
- pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
- pr_debug(" No card in the slot (0x%04x) !\n", val);
+ eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@@ -737,7 +725,7 @@
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
- pr_debug(" In power-off state, power it on ...\n");
+ eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@@ -753,7 +741,7 @@
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- pr_debug(" No link reporting capability (0x%08x) \n", val);
+ eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@@ -770,10 +758,10 @@
}
if (val & PCI_EXP_LNKSTA_DLLLA)
- pr_debug(" Link up (%s)\n",
+ eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
- pr_debug(" Link not ready (0x%04x)\n", val);
+ eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@@ -853,7 +841,7 @@
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -865,8 +853,6 @@
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
-
- return NULL;
}
/**
@@ -945,7 +931,7 @@
return pe->bus;
/* Retrieve the parent PCI bus of first (top) PCI device */
- edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
return pdev->bus;
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index deed906..3fa04dd 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -82,8 +82,9 @@
if (!(edev->pe->state & EEH_PE_ISOLATED))
return count;
- if (eeh_unfreeze_pe(edev->pe, true))
+ if (eeh_unfreeze_pe(edev->pe))
return -EIO;
+ eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED, true);
return count;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e58c3f4..d60908e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,12 +12,6 @@
*
* This file contains the system call entry code, context switch
* code, and exception/interrupt return code for PowerPC.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/errno.h>
@@ -36,15 +31,10 @@
#include <asm/asm-405.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
+#include <asm/kup.h>
+#include <asm/bug.h>
-/*
- * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
- */
-#if MSR_KERNEL >= 0x10000
-#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
-#else
-#define LOAD_MSR_KERNEL(r, x) li r,(x)
-#endif
+#include "head_32.h"
/*
* Align to 4k in order to ensure that all functions modyfing srr0/srr1
@@ -97,14 +87,11 @@
mfspr r0,SPRN_SRR1
stw r0,_SRR1(r11)
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
+ /* set the stack limit to the current stack */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,SAVED_KSP_LIMIT(r11)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -121,14 +108,11 @@
mfspr r0,SPRN_SRR1
stw r0,crit_srr1@l(0)
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
+ /* set the stack limit to the current stack */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,saved_ksp_limit@l(0)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -156,9 +140,8 @@
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
- addi r2,r12,-THREAD
- tovirt(r2,r2) /* set r2 to current */
beq 2f /* if from user, fix up THREAD.regs */
+ addi r2, r12, -THREAD
addi r11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
@@ -166,6 +149,12 @@
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_lock r11, r12
+#endif
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
@@ -174,8 +163,7 @@
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
+ lwz r9,TASK_CPU(r2)
slwi r9,r9,3
add r11,r11,r9
#endif
@@ -185,59 +173,72 @@
addi r12,r12,-1
stw r12,4(r11)
#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9, r9)
- ACCOUNT_CPU_USER_ENTRY(r9, r11, r12)
-#endif
b 3f
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
+ kuap_save_and_lock r11, r12, r9, r2, r0
+ addi r2, r12, -THREAD
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
5:
-#if defined(CONFIG_6xx) || defined(CONFIG_E500)
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9,r9) /* check local flags */
- lwz r12,TI_LOCAL_FLAGS(r9)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
+ lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
-#endif /* CONFIG_6xx || CONFIG_E500 */
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
.globl transfer_to_handler_cont
transfer_to_handler_cont:
3:
mflr r9
+ tovirt(r2, r2) /* set r2 to current */
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * When tracing IRQ state (lockdep) we enable the MMU before we call
+ * the IRQ tracing functions as they might access vmalloc space or
+ * perform IOs for console output.
+ *
+ * To speed up the syscall path where interrupts stay on, let's check
+ * first if we are changing the MSR value at all.
+ */
+ tophys(r12, r1)
+ lwz r12,_MSR(r12)
+ andi. r12,r12,MSR_EE
+ bne 1f
+
+ /* MSR isn't changing, just transition directly */
+#endif
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r10
+ mtlr r9
+ SYNC
+ RFI /* jump to handler, enable MMU */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
+ * keep interrupts disabled at this point otherwise we might risk
+ * taking an interrupt before we tell lockdep they are enabled.
+ */
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r0
SYNC
RFI
-reenable_mmu: /* re-enable mmu so we can */
- mfmsr r10
- lwz r12,_MSR(r1)
- xor r10,r10,r12
- andi. r10,r10,MSR_EE /* Did EE change? */
- beq 1f
+reenable_mmu:
/*
- * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
- * If from user mode there is only one stack frame on the stack, and
- * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
- * stack frame to make trace_hardirqs_off happy.
- *
- * This is handy because we also need to save a bunch of GPRs,
+ * We save a bunch of GPRs,
* r3 can be different from GPR3(r1) at this point, r9 and r11
* contains the old MSR and handler address respectively,
* r4 & r5 can contain page fault arguments that need to be passed
@@ -245,14 +246,19 @@
* they aren't useful past this point (aren't syscall arguments),
* the rest is restored from the exception frame.
*/
+
stwu r1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
stw r3,16(r1)
stw r4,20(r1)
stw r5,24(r1)
- bl trace_hardirqs_off
- lwz r5,24(r1)
+
+ /* If we are disabling interrupts (normal case), simply log it with
+ * lockdep
+ */
+1: bl trace_hardirqs_off
+2: lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
@@ -262,27 +268,22 @@
lwz r6,GPR6(r1)
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
-1: mtctr r11
+ mtctr r11
mtlr r9
bctr /* jump to handler */
-#else /* CONFIG_TRACE_IRQFLAGS */
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined (CONFIG_6xx) || defined(CONFIG_E500)
+#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
b power_save_ppc32_restore
7: rlwinm r12,r12,0,~_TLF_SLEEPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
+ kuap_restore r11, r2, r3, r4, r5
b fast_exception_return
#endif
@@ -303,7 +304,7 @@
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -312,6 +313,33 @@
SYNC
RFI
+#ifdef CONFIG_TRACE_IRQFLAGS
+trace_syscall_entry_irq_off:
+ /*
+ * Syscall shouldn't happen while interrupts are disabled,
+ * so let's do a warning here.
+ */
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+ bl trace_hardirqs_on
+
+ /* Now enable for real */
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
+ mtmsr r10
+
+ REST_GPR(0, r1)
+ REST_4GPRS(3, r1)
+ REST_2GPRS(7, r1)
+ b DoSyscall
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ .globl transfer_to_syscall
+transfer_to_syscall:
+#ifdef CONFIG_TRACE_IRQFLAGS
+ andi. r12,r9,MSR_EE
+ beq- trace_syscall_entry_irq_off
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
/*
* Handle a system call.
*/
@@ -323,36 +351,16 @@
stw r3,ORIG_GPR3(r1)
li r12,0
stw r12,RESULT(r1)
- lwz r11,_CCR(r1) /* Clear SO bit in CR */
- rlwinm r11,r11,0,4,2
- stw r11,_CCR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
- /* Return from syscalls can (and generally will) hard enable
- * interrupts. You aren't supposed to call a syscall with
- * interrupts disabled in the first place. However, to ensure
- * that we get it right vs. lockdep if it happens, we force
- * that hard enable here with appropriate tracing if we see
- * that we have been called with interrupts off
- */
+ /* Make sure interrupts are enabled */
mfmsr r11
andi. r12,r11,MSR_EE
- bne+ 1f
- /* We came in with interrupts disabled, we enable them now */
- bl trace_hardirqs_on
- mfmsr r11
- lwz r0,GPR0(r1)
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- ori r11,r11,MSR_EE
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtmsr r11
-1:
+ /* We came in with interrupts disabled, we WARN and mark them enabled
+ * for lockdep now */
+0: tweqi r12, 0
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
#endif /* CONFIG_TRACE_IRQFLAGS */
- CURRENT_THREAD_INFO(r10, r1)
- lwz r11,TI_FLAGS(r10)
+ lwz r11,TI_FLAGS(r2)
andi. r11,r11,_TIF_SYSCALL_DOTRACE
bne- syscall_dotrace
syscall_dotrace_cont:
@@ -385,13 +393,12 @@
lwz r3,GPR3(r1)
#endif
mr r6,r3
- CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
- lwz r9,TI_FLAGS(r12)
+ lwz r9,TI_FLAGS(r2)
li r8,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- syscall_exit_work
@@ -405,8 +412,7 @@
lwz r8,_MSR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
/* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't happen though but we
- * want to catch the bugger if it does right ?
+ * off, we trace that here. It shouldn't normally happen.
*/
andi. r10,r8,MSR_EE
bne+ 1f
@@ -435,13 +441,11 @@
lwarx r7,0,r1
END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- andi. r4,r8,MSR_PR
- beq 3f
- CURRENT_THREAD_INFO(r4, r1)
- ACCOUNT_CPU_USER_EXIT(r4, r5, r7)
-3:
+ ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r5, r7
#endif
+ kuap_check r2, r4
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -532,7 +536,7 @@
/* Clear per-syscall TIF flags if any are set. */
li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
+ addi r12,r2,TI_FLAGS
3: lwarx r8,0,r12
andc r8,r8,r11
#ifdef CONFIG_IBM405_ERR77
@@ -540,7 +544,6 @@
#endif
stwcx. r8,0,r12
bne- 3b
- subi r12,r12,TI_FLAGS
4: /* Anything which requires enabling interrupts? */
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -594,6 +597,14 @@
stw r0,_TRAP(r1) /* register set saved */
b sys_clone
+ .globl ppc_clone3
+ppc_clone3:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
+ stw r0,_TRAP(r1) /* register set saved */
+ b sys_clone3
+
.globl ppc_swapcontext
ppc_swapcontext:
SAVE_NVGPRS(r1)
@@ -612,7 +623,7 @@
handle_page_fault:
stw r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
andis. r0,r5,DSISR_DABRMATCH@h
bne- handle_dabr_fault
#endif
@@ -629,7 +640,7 @@
bl bad_page_fault
b ret_from_except_full
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
SAVE_NVGPRS(r1)
@@ -693,6 +704,7 @@
stw r10,_CCR(r1)
stw r1,KSP(r3) /* Set old stack pointer */
+ kuap_check r2, r4
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all
@@ -745,6 +757,9 @@
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r11)
REST_GPR(10, r11)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -762,11 +777,19 @@
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
+#if CONFIG_PPC_BOOK3S_601
+ bge 2b
+#else
bge 3f
+#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
+#if CONFIG_PPC_BOOK3S_601
+ blt 2b
+#else
blt 3f
+#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -785,16 +808,13 @@
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
-BEGIN_FTR_SECTION
- b 2b
-END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
lis r10,MSR_KERNEL@h
ori r10,r10,MSR_KERNEL@l
bl transfer_to_handler_full
- .long nonrecoverable_exception
+ .long unrecoverable_exception
.long ret_from_except
#endif
@@ -809,7 +829,7 @@
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -819,8 +839,7 @@
user_exc_return: /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
+ lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_USER_WORK_MASK
bne do_work
@@ -832,9 +851,9 @@
andis. r10,r0,DBCR0_IDM@h
bnel- load_dbcr0
#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- CURRENT_THREAD_INFO(r9, r1)
- ACCOUNT_CPU_USER_EXIT(r9, r10, r11)
+ ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r10, r11
#endif
b restore
@@ -842,8 +861,7 @@
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_FLAGS(r9)
+ lwz r8,TI_FLAGS(r2)
andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
beq+ 1f
@@ -869,7 +887,7 @@
/* Clear _TIF_EMULATE_STACK_STORE flag */
lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
+ addi r5,r2,TI_FLAGS
0: lwarx r8,0,r5
andc r8,r8,r11
#ifdef CONFIG_IBM405_ERR77
@@ -881,14 +899,14 @@
#ifdef CONFIG_PREEMPT
/* check current_thread_info->preempt_count */
- lwz r0,TI_PREEMPT(r9)
+ lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore
+ bne restore_kuap
andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore
+ beq+ restore_kuap
lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore /* don't schedule if so */
+ beq restore_kuap /* don't schedule if so */
#ifdef CONFIG_TRACE_IRQFLAGS
/* Lockdep thinks irqs are enabled, we need to call
* preempt_schedule_irq with IRQs off, so we inform lockdep
@@ -896,11 +914,7 @@
*/
bl trace_hardirqs_off
#endif
-1: bl preempt_schedule_irq
- CURRENT_THREAD_INFO(r9, r1)
- lwz r3,TI_FLAGS(r9)
- andi. r0,r3,_TIF_NEED_RESCHED
- bne- 1b
+ bl preempt_schedule_irq
#ifdef CONFIG_TRACE_IRQFLAGS
/* And now, to properly rebalance the above, we tell lockdep they
* are being turned back on, which will happen when we return
@@ -908,6 +922,8 @@
bl trace_hardirqs_on
#endif
#endif /* CONFIG_PREEMPT */
+restore_kuap:
+ kuap_restore r1, r2, r9, r10, r0
/* interrupts are hard-disabled at this point */
restore:
@@ -931,28 +947,14 @@
* off in this assembly code while peeking at TI_FLAGS() and such. However
* we need to inform it if the exception turned interrupts off, and we
* are about to trun them back on.
- *
- * The problem here sadly is that we don't know whether the exceptions was
- * one that turned interrupts off or not. So we always tell lockdep about
- * turning them on here when we go back to wherever we came from with EE
- * on, even if that may meen some redudant calls being tracked. Maybe later
- * we could encode what the exception did somewhere or test the exception
- * type in the pt_regs but that sounds overkill
*/
andi. r10,r9,MSR_EE
beq 1f
- /*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
stwu r1,-32(r1)
mflr r0
stw r0,4(r1)
- stwu r1,-32(r1)
bl trace_hardirqs_on
- lwz r1,0(r1)
- lwz r1,0(r1)
+ addi r1, r1, 32
lwz r9,_MSR(r1)
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
@@ -982,6 +984,9 @@
mtcrf 0xFF,r10
mtlr r11
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r1)
/*
* Once we put values in SRR0 and SRR1, we are in a state
* where exceptions are not recoverable, since taking an
@@ -991,15 +996,12 @@
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r9
REST_4GPRS(9, r1)
@@ -1021,6 +1023,9 @@
mtlr r11
lwz r10,_CCR(r1)
mtcrf 0xff,r10
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r1)
REST_2GPRS(9, r1)
.globl exc_exit_restart
exc_exit_restart:
@@ -1066,7 +1071,7 @@
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1166,10 +1171,6 @@
mfspr r9,SPRN_SPRG_THREAD
lwz r10,SAVED_KSP_LIMIT(r1)
stw r10,KSP_LIMIT(r9)
- lwz r9,THREAD_INFO-THREAD(r9)
- CURRENT_THREAD_INFO(r10, r1)
- lwz r10,TI_PREEMPT(r10)
- stw r10,TI_PREEMPT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
@@ -1201,8 +1202,7 @@
lis r11,global_dbcr0@ha
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
+ lwz r9,TASK_CPU(r2)
slwi r9,r9,3
add r11,r11,r9
#endif
@@ -1217,6 +1217,7 @@
.section .bss
.align 4
+ .global global_dbcr0
global_dbcr0:
.space 8*NR_CPUS
.previous
@@ -1227,9 +1228,10 @@
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
- /* Note: We don't need to inform lockdep that we are enabling
- * interrupts here. As far as it knows, they are already enabled
- */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_on
+ mfmsr r10
+#endif
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10) /* hard-enable interrupts */
@@ -1239,11 +1241,10 @@
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
+ lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
andi. r0,r9,_TIF_USER_WORK_MASK
@@ -1274,11 +1275,19 @@
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
+#ifdef CONFIG_PPC_BOOK3S_601
+ bgelr
+#else
bge 3f
+#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
+#ifdef CONFIG_PPC_BOOK3S_601
+ bltlr
+#else
blt 3f
+#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1287,17 +1296,17 @@
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
- beq 4f
+ beq 5f
SAVE_NVGPRS(r1)
rlwinm r3,r3,0,0,30
stw r3,_TRAP(r1)
+5: mfspr r2,SPRN_SPRG_THREAD
+ addi r2,r2,-THREAD
+ tovirt(r2,r2) /* set back r2 to current */
4: addi r3,r1,STACK_FRAME_OVERHEAD
- bl nonrecoverable_exception
+ bl unrecoverable_exception
/* shouldn't return */
b 4b
@@ -1330,12 +1339,12 @@
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
- mtspr SPRN_SPRG_RTAS,r7
+ stw r7, THREAD + RTAS_SP(r2)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI
@@ -1344,7 +1353,8 @@
lwz r9,8(r9) /* original msr value */
addi r1,r1,INT_FRAME_SIZE
li r0,0
- mtspr SPRN_SPRG_RTAS,r0
+ tophys(r7, r2)
+ stw r0, THREAD + RTAS_SP(r7)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI /* return to caller */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2206912..3fd3ef3 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,11 +12,6 @@
*
* This file contains the system call entry code, context switch
* code, and exception/interrupt return code for PowerPC.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -46,6 +42,7 @@
#include <asm/exception-64e.h>
#endif
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* System calls.
@@ -54,6 +51,9 @@
SYS_CALL_TABLE:
.tc sys_call_table[TC],sys_call_table
+COMPAT_SYS_CALL_TABLE:
+ .tc compat_sys_call_table[TC],compat_sys_call_table
+
/* This value is used to mark exception frames on the stack. */
exception_marker:
.tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
@@ -69,19 +69,20 @@
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- andi. r10,r12,MSR_PR
mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
+ std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+START_BTB_FLUSH_SECTION
+ BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-2: std r2,GPR2(r1)
+ std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -112,16 +113,18 @@
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
+
+ kuap_check_amr r10, r11
+
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
+ /* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
+ cmpd r11,r10
+ beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -158,7 +161,7 @@
li r10,IRQS_ENABLED
std r10,SOFTE(r1)
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACA_THREAD_INFO(r13)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_DOTRACE
bne .Lsyscall_dotrace /* does not return */
@@ -171,9 +174,9 @@
* based on caller's run-mode / personality.
*/
ld r11,SYS_CALL_TABLE@toc(2)
- andi. r10,r10,_TIF_32BIT
+ andis. r10,r10,_TIF_32BIT@h
beq 15f
- addi r11,r11,8 /* use 32-bit syscall entries */
+ ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
clrldi r3,r3,32
clrldi r4,r4,32
clrldi r5,r5,32
@@ -181,7 +184,7 @@
clrldi r7,r7,32
clrldi r8,r8,32
15:
- slwi r0,r0,4
+ slwi r0,r0,3
barrier_nospec_asm
/*
@@ -195,6 +198,7 @@
mtctr r12
bctrl /* Call handler */
+ /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -205,14 +209,9 @@
ld r3,RESULT(r1)
#endif
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- .Lunrecov_restore
-#endif
/*
* This is a few instructions into the actual syscall exit path (which actually
@@ -228,18 +227,14 @@
/*
* Disable interrupts so current_thread_info()->flags can't change,
* and so that we don't get interrupted after loading SRR0/1.
+ *
+ * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
+ * could fault on the load of the TI_FLAGS below.
*/
#ifdef CONFIG_PPC_BOOK3E
wrteei 0
#else
- /*
- * For performance reasons we clear RI the same time that we
- * clear EE. We only need to clear RI just before we restore r13
- * below, but batching it with EE saves us one expensive mtmsrd call.
- * We have to be careful to restore RI if we branch anywhere from
- * here (eg syscall_exit_work).
- */
- li r11,0
+ li r11,MSR_RI
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
@@ -255,15 +250,7 @@
bne 3f
#endif
2: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
@@ -279,6 +266,18 @@
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+ kuap_check_amr r10, r11
+
+#ifdef CONFIG_PPC_BOOK3S
+ /*
+ * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
+ * this later, but testing shows that doing it here causes less slow
+ * down than doing it closer to the rfid.
+ */
+ li r11,0
+ mtmsrd r11,1
+#endif
+
beq- 1f
ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
@@ -286,6 +285,14 @@
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ std r8, PACATMSCRATCH(r13)
+#endif
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
ld r2,GPR2(r1)
ld r1,GPR1(r1)
@@ -296,8 +303,10 @@
RFI_TO_USER
b . /* prevent speculative execution */
- /* exit to kernel */
-1: ld r2,GPR2(r1)
+1: /* exit to kernel */
+ kuap_restore_amr r2
+
+ ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
@@ -336,7 +345,7 @@
/* Repopulate r9 and r10 for the syscall path */
addi r9,r1,STACK_FRAME_OVERHEAD
- CURRENT_THREAD_INFO(r10, r1)
+ ld r10, PACA_THREAD_INFO(r13)
ld r10,TI_FLAGS(r10)
cmpldi r0,NR_syscalls
@@ -351,10 +360,6 @@
b .Lsyscall_exit
.Lsyscall_exit_work:
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
@@ -386,10 +391,9 @@
4: /* Anything else left to do? */
BEGIN_FTR_SECTION
- lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */
- ld r10,PACACURRENT(r13)
+ lis r3,DEFAULT_PPR@highest /* Set default PPR */
sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,TASKTHREADPPR(r10)
+ std r3,_PPR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -474,6 +478,11 @@
bl sys_clone
b .Lsyscall_exit
+_GLOBAL(ppc_clone3)
+ bl save_nvgprs
+ bl sys_clone3
+ b .Lsyscall_exit
+
_GLOBAL(ppc32_swapcontext)
bl save_nvgprs
bl compat_sys_swapcontext
@@ -528,6 +537,7 @@
/* Save LR into r9 */
mflr r9
+ // Flush the link stack
.rept 64
bl .+4
.endr
@@ -537,6 +547,11 @@
.balign 32
/* Restore LR */
1: mtlr r9
+
+ // If we're just flushing the link stack, return here
+3: nop
+ patch_site 3b patch__flush_link_stack_return
+
li r9,0x7fff
mtctr r9
@@ -589,6 +604,8 @@
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
+ kuap_check_amr r9, r10
+
FLUSH_COUNT_CACHE
/*
@@ -624,6 +641,10 @@
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
std r6,PACACURRENT(r13) /* Set new 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+ ld r6, TASK_CANARY(r6)
+ std r6, PACA_CANARY(r13)
+#endif
ld r8,KSP(r4) /* new stack pointer */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -672,13 +693,15 @@
isync
slbie r6
+BEGIN_FTR_SECTION
slbie r6 /* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
#endif /* CONFIG_PPC_BOOK3S_64 */
- CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
+ clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
because we don't need to leave the 288-byte ABI gap at the
top of the kernel stack. */
@@ -729,7 +752,7 @@
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r3,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3E
ld r10,PACACURRENT(r13)
@@ -840,13 +863,7 @@
* sure we are soft-disabled first and reconcile irq state.
*/
RECONCILE_IRQ_STATE(r3,r4)
-1: bl preempt_schedule_irq
-
- /* Re-test flags and eventually loop */
- CURRENT_THREAD_INFO(r9, r1)
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
+ bl preempt_schedule_irq
/*
* arch_local_irq_restore() from preempt_schedule_irq above may
@@ -931,17 +948,13 @@
ld r4,_XER(r1)
mtspr SPRN_XER,r4
+ kuap_check_amr r5, r6
+
REST_8GPRS(5, r1)
andi. r0,r3,MSR_RI
beq- .Lunrecov_restore
- /* Load PPR from thread struct before we clear MSR:RI */
-BEGIN_FTR_SECTION
- ld r2,PACACURRENT(r13)
- ld r2,TASKTHREADPPR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
/*
* Clear RI before restoring r13. If we are returning to
* userspace and we take an exception after restoring r13,
@@ -962,11 +975,17 @@
andi. r0,r3,MSR_PR
beq 1f
BEGIN_FTR_SECTION
- mtspr SPRN_PPR,r2 /* Restore PPR */
+ /* Restore PPR */
+ ld r2,_PPR(r1)
+ mtspr SPRN_PPR,r2
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -989,9 +1008,19 @@
ld r2,_NIP(r1)
mtspr SPRN_SRR0,r2
+ /*
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ li r2,0
+ std r2,STACK_FRAME_OVERHEAD-16(r1)
+
ld r0,GPR0(r1)
ld r2,GPR2(r1)
ld r3,GPR3(r1)
+
+ kuap_restore_amr r4
+
ld r4,GPR4(r1)
ld r1,GPR1(r1)
RFI_TO_KERNEL
@@ -1118,7 +1147,7 @@
_GLOBAL(enter_rtas)
mflr r0
std r0,16(r1)
- stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
/* Because RTAS is running in 32b mode, it clobbers the high order half
* of all registers that it saves. We therefore save those registers
@@ -1250,7 +1279,7 @@
ld r8,_DSISR(r1)
mtdsisr r8
- addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
+ addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
ld r0,16(r1) /* get return address */
mtlr r0
@@ -1261,7 +1290,7 @@
_GLOBAL(enter_prom)
mflr r0
std r0,16(r1)
- stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
/* Because PROM is running in 32b mode, it clobbers the high order half
* of all registers that it saves. We therefore save those registers
@@ -1318,8 +1347,8 @@
REST_10GPRS(22, r1)
ld r4,_CCR(r1)
mtcr r4
-
- addi r1,r1,PROM_FRAME_SIZE
+
+ addi r1,r1,SWITCH_FRAME_SIZE
ld r0,16(r1)
mtlr r0
blr
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 52ca247..69a9125 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2012 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -21,10 +17,9 @@
#ifndef CONFIG_PPC64
/* epapr_ev_idle() was derived from e500_idle() */
_GLOBAL(epapr_ev_idle)
- CURRENT_THREAD_INFO(r3, r1)
- PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ PPC_LL r4, TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4, r4,_TLF_NAPPING /* so when we take an exception */
- PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */
wrteei 1
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 59e4ba7..9d32158 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* ePAPR para-virtualization support.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright (C) 2012 Freescale Semiconductor, Inc.
*/
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 6d6e144..829950b 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Boot code and exception vectors for Book3E processors
*
* Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -77,17 +73,6 @@
andi. r3,r3,MSR_PR
bnelr
- /* Copy info into temporary exception thread info */
- ld r11,PACAKSAVE(r13)
- CURRENT_THREAD_INFO(r11, r11)
- CURRENT_THREAD_INFO(r12, r1)
- ld r10,TI_FLAGS(r11)
- std r10,TI_FLAGS(r12)
- ld r10,TI_PREEMPT(r11)
- std r10,TI_PREEMPT(r12)
- ld r10,TI_TASK(r11)
- std r10,TI_TASK(r12)
-
/*
* Advance to the next TLB exception frame for handler
* types that don't do it automatically.
@@ -296,7 +281,8 @@
andi. r10,r11,MSR_PR; /* save stack pointer */ \
beq 1f; /* branch around if supervisor */ \
ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\
-1: cmpdi cr1,r1,0; /* check if SP makes sense */ \
+1: type##_BTB_FLUSH \
+ cmpdi cr1,r1,0; /* check if SP makes sense */ \
bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */
@@ -328,6 +314,30 @@
#define SPRN_MC_SRR0 SPRN_MCSRR0
#define SPRN_MC_SRR1 SPRN_MCSRR1
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define GEN_BTB_FLUSH \
+ START_BTB_FLUSH_SECTION \
+ beq 1f; \
+ BTB_FLUSH(r10) \
+ 1: \
+ END_BTB_FLUSH_SECTION
+
+#define CRIT_BTB_FLUSH \
+ START_BTB_FLUSH_SECTION \
+ BTB_FLUSH(r10) \
+ END_BTB_FLUSH_SECTION
+
+#define DBG_BTB_FLUSH CRIT_BTB_FLUSH
+#define MC_BTB_FLUSH CRIT_BTB_FLUSH
+#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
+#else
+#define GEN_BTB_FLUSH
+#define CRIT_BTB_FLUSH
+#define DBG_BTB_FLUSH
+#define MC_BTB_FLUSH
+#define GDBELL_BTB_FLUSH
+#endif
+
#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \
EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
@@ -480,7 +490,7 @@
* interrupts happen before the wait instruction.
*/
#define CHECK_NAPPING() \
- CURRENT_THREAD_INFO(r11, r1); \
+ ld r11, PACA_THREAD_INFO(r13); \
ld r10,TI_LOCAL_FLAGS(r11); \
andi. r9,r10,_TLF_NAPPING; \
beq+ 1f; \
@@ -740,12 +750,14 @@
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -810,12 +822,14 @@
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -1439,7 +1453,7 @@
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2d8fc8c..d0018dd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,6 +19,678 @@
#include <asm/cpuidle.h>
#include <asm/head-64.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
+
+/* PACA save area offsets (exgen, exmc, etc) */
+#define EX_R9 0
+#define EX_R10 8
+#define EX_R11 16
+#define EX_R12 24
+#define EX_R13 32
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#if defined(CONFIG_RELOCATABLE)
+#define EX_CTR 72
+.if EX_SIZE != 10
+ .error "EX_SIZE is wrong"
+.endif
+#else
+.if EX_SIZE != 9
+ .error "EX_SIZE is wrong"
+.endif
+#endif
+
+/*
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
+ * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
+ * EXC_COMMON - After switching to virtual, relocated mode.
+ */
+
+#define EXC_REAL_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .balign IFETCH_ALIGN_BYTES; \
+ .global name; \
+ _ASM_NOKPROBE_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#define TRAMP_KVM_BEGIN(name) \
+ TRAMP_VIRT_BEGIN(name)
+#else
+#define TRAMP_KVM_BEGIN(name)
+#endif
+
+#define EXC_REAL_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
+
+/*
+ * We're short on space and time in the exception prolog, so we can't
+ * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
+ * Instead we get the base of the kernel from paca->kernelbase and or in the low
+ * part of label. This requires that the label be within 64KB of kernelbase, and
+ * that kernelbase be 64K aligned.
+ */
+#define LOAD_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); /* get high part of &label */ \
+ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
+
+#define __LOAD_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label))@l
+
+/*
+ * Branches from unrelocated code (e.g., interrupts) to labels outside
+ * head-y require >64K offsets.
+ */
+#define __LOAD_FAR_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label))@l; \
+ addis reg,reg,(ABS_ADDR(label))@h
+
+/* Exception register prefixes */
+#define EXC_HV_OR_STD 2 /* depends on HVMODE */
+#define EXC_HV 1
+#define EXC_STD 0
+
+#if defined(CONFIG_RELOCATABLE)
+/*
+ * If we support interrupts with relocation on AND we're a relocatable kernel,
+ * we need to use CTR to get to the 2nd level handler. So, save/restore it
+ * when required.
+ */
+#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
+#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
+#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
+#else
+/* ...else CTR is unused and in register. */
+#define SAVE_CTR(reg, area)
+#define GET_CTR(reg, area) mfctr reg
+#define RESTORE_CTR(reg, area)
+#endif
+
+/*
+ * PPR save/restore macros used in exceptions-64s.S
+ * Used for P7 or later processors
+ */
+#define SAVE_PPR(area, ra) \
+BEGIN_FTR_SECTION_NESTED(940) \
+ ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
+ std ra,_PPR(r1); \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
+
+#define RESTORE_PPR_PACA(area, ra) \
+BEGIN_FTR_SECTION_NESTED(941) \
+ ld ra,area+EX_PPR(r13); \
+ mtspr SPRN_PPR,ra; \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
+
+/*
+ * Get an SPR into a register if the CPU has the given feature
+ */
+#define OPT_GET_SPR(ra, spr, ftr) \
+BEGIN_FTR_SECTION_NESTED(943) \
+ mfspr ra,spr; \
+END_FTR_SECTION_NESTED(ftr,ftr,943)
+
+/*
+ * Set an SPR from a register if the CPU has the given feature
+ */
+#define OPT_SET_SPR(ra, spr, ftr) \
+BEGIN_FTR_SECTION_NESTED(943) \
+ mtspr spr,ra; \
+END_FTR_SECTION_NESTED(ftr,ftr,943)
+
+/*
+ * Save a register to the PACA if the CPU has the given feature
+ */
+#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
+BEGIN_FTR_SECTION_NESTED(943) \
+ std ra,offset(r13); \
+END_FTR_SECTION_NESTED(ftr,ftr,943)
+
+/*
+ * Branch to label using its 0xC000 address. This results in instruction
+ * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
+ * on using mtmsr rather than rfid.
+ *
+ * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
+ * load KBASE for a slight optimisation.
+ */
+#define BRANCH_TO_C000(reg, label) \
+ __LOAD_FAR_HANDLER(reg, label); \
+ mtctr reg; \
+ bctr
+
+.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
+ TRAMP_KVM_BEGIN(\name\()_kvm)
+ KVM_HANDLER \vec, \hsrr, \area, \skip
+.endm
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * If hv is possible, interrupts come into to the hv version
+ * of the kvmppc_interrupt code, which then jumps to the PR handler,
+ * kvmppc_interrupt_pr, if the guest is a PR guest.
+ */
+#define kvmppc_interrupt kvmppc_interrupt_hv
+#else
+#define kvmppc_interrupt kvmppc_interrupt_pr
+#endif
+
+.macro KVMTEST name, hsrr, n
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,0
+ bne \name\()_kvm
+.endm
+
+.macro KVM_HANDLER vec, hsrr, area, skip
+ .if \skip
+ cmpwi r10,KVM_GUEST_MODE_SKIP
+ beq 89f
+ .else
+BEGIN_FTR_SECTION_NESTED(947)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,HSTATE_CFAR(r13)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+ .endif
+
+BEGIN_FTR_SECTION_NESTED(948)
+ ld r10,\area+EX_PPR(r13)
+ std r10,HSTATE_PPR(r13)
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
+ ld r10,\area+EX_R10(r13)
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r9,32
+ /* HSRR variants have the 0x2 bit added to their trap number */
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ ori r12,r12,(\vec + 0x2)
+ FTR_SECTION_ELSE
+ ori r12,r12,(\vec)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ ori r12,r12,(\vec + 0x2)
+ .else
+ ori r12,r12,(\vec)
+ .endif
+
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
+ * to be saved in HSTATE_SCRATCH1.
+ */
+ mfctr r9
+ std r9,HSTATE_SCRATCH1(r13)
+ __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
+ mtctr r9
+ ld r9,\area+EX_R9(r13)
+ bctr
+#else
+ ld r9,\area+EX_R9(r13)
+ b kvmppc_interrupt
+#endif
+
+
+ .if \skip
+89: mtocrf 0x80,r9
+ ld r9,\area+EX_R9(r13)
+ ld r10,\area+EX_R10(r13)
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ b kvmppc_skip_Hinterrupt
+ FTR_SECTION_ELSE
+ b kvmppc_skip_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ b kvmppc_skip_Hinterrupt
+ .else
+ b kvmppc_skip_interrupt
+ .endif
+ .endif
+.endm
+
+#else
+.macro KVMTEST name, hsrr, n
+.endm
+.macro KVM_HANDLER name, vec, hsrr, area, skip
+.endm
+#endif
+
+.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ .if ! \set_ri
+ xori r10,r10,MSR_RI /* Clear MSR_RI */
+ .endif
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ .endif
+ LOAD_HANDLER(r10, \label\())
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ FTR_SECTION_ELSE
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ .else
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.endm
+
+/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
+.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
+#ifdef CONFIG_RELOCATABLE
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ .endif
+ LOAD_HANDLER(r12, \label\())
+ mtctr r12
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ bctr
+#else
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ b \label
+#endif
+.endm
+
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ * These are normal interrupts (synchronous and asynchronous) handled by
+ * the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ * / intended for host or guest kernel, but KVM must always be involved
+ * because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ * These occur when maskable asynchronous interrupts are taken with the
+ * irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ * This is done if early=1. MCE and HMI use these to handle errors in real
+ * mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ * This is done if early=2.
+ */
+.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+ SET_SCRATCH0(r13) /* save r13 */
+ GET_PACA(r13)
+ std r9,\area\()+EX_R9(r13) /* save r9 */
+ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
+ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ .if \ool
+ .if !\virt
+ b tramp_real_\name
+ .pushsection .text
+ TRAMP_REAL_BEGIN(tramp_real_\name)
+ .else
+ b tramp_virt_\name
+ .pushsection .text
+ TRAMP_VIRT_BEGIN(tramp_virt_\name)
+ .endif
+ .endif
+
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+ INTERRUPT_TO_KERNEL
+ SAVE_CTR(r10, \area\())
+ mfcr r9
+ .if \kvm
+ KVMTEST \name \hsrr \vec
+ .endif
+ .if \bitmask
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,\bitmask
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if \vec == 0x500 || \vec == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif \vec == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif \vec == 0xa00 || \vec == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif \vec == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif \vec == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ std r11,\area\()+EX_R11(r13)
+ std r12,\area\()+EX_R12(r13)
+
+ /*
+ * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+ * because a d-side MCE will clobber those registers so is
+ * not recoverable if they are live.
+ */
+ GET_SCRATCH0(r10)
+ std r10,\area\()+EX_R13(r13)
+ .if \dar
+ .if \hsrr
+ mfspr r10,SPRN_HDAR
+ .else
+ mfspr r10,SPRN_DAR
+ .endif
+ std r10,\area\()+EX_DAR(r13)
+ .endif
+ .if \dsisr
+ .if \hsrr
+ mfspr r10,SPRN_HDSISR
+ .else
+ mfspr r10,SPRN_DSISR
+ .endif
+ stw r10,\area\()+EX_DSISR(r13)
+ .endif
+
+ .if \early == 2
+ /* nothing more */
+ .elseif \early
+ mfctr r10 /* save ctr, even for !RELOCATABLE */
+ BRANCH_TO_C000(r11, \name\()_early_common)
+ .elseif !\virt
+ INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .else
+ INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ .endif
+ .if \ool
+ .popsection
+ .endif
+.endm
+
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
+ .if \stack
+ andi. r10,r12,MSR_PR /* See if coming from user */
+ mr r10,r1 /* Save r1 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
+ beq- 100f
+ ld r1,PACAKSAVE(r13) /* kernel stack to use */
+100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+ .endif
+
+ std r9,_CCR(r1) /* save CR in stackframe */
+ std r11,_NIP(r1) /* save SRR0 in stackframe */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+
+ .if \stack
+ .if \kaup
+ kuap_save_amr_and_lock r9, r10, cr1, cr0
+ .endif
+ beq 101f /* if from kernel mode */
+ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
+ SAVE_PPR(\area, r9)
+101:
+ .else
+ .if \kaup
+ kuap_save_amr_and_lock r9, r10, cr1
+ .endif
+ .endif
+
+ /* Save original regs values from save area to stack frame. */
+ ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,\area+EX_R10(r13)
+ std r9,GPR9(r1)
+ std r10,GPR10(r1)
+ ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,\area+EX_R12(r13)
+ ld r11,\area+EX_R13(r13)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+ .if \dar
+ .if \dar == 2
+ ld r10,_NIP(r1)
+ .else
+ ld r10,\area+EX_DAR(r13)
+ .endif
+ std r10,_DAR(r1)
+ .endif
+ .if \dsisr
+ .if \dsisr == 2
+ ld r10,_MSR(r1)
+ lis r11,DSISR_SRR1_MATCH_64S@h
+ and r10,r10,r11
+ .else
+ lwz r10,\area+EX_DSISR(r13)
+ .endif
+ std r10,_DSISR(r1)
+ .endif
+BEGIN_FTR_SECTION_NESTED(66)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
+ GET_CTR(r10, \area)
+ std r10,_CTR(r1)
+ std r2,GPR2(r1) /* save r2 in stackframe */
+ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
+ SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ mflr r9 /* Get LR, later save to stack */
+ ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+ lbz r10,PACAIRQSOFTMASK(r13)
+ mfspr r11,SPRN_XER /* save XER in stackframe */
+ std r10,SOFTE(r1)
+ std r11,_XER(r1)
+ li r9,(\vec)+1
+ std r9,_TRAP(r1) /* set trap number */
+ li r10,0
+ ld r11,exception_marker@toc(r2)
+ std r10,RESULT(r1) /* clear regs->result */
+ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+
+ .if \stack
+ ACCOUNT_STOLEN_TIME
+ .endif
+
+ .if \reconcile
+ RECONCILE_IRQ_STATE(r10, r11)
+ .endif
+.endm
+
+/*
+ * Restore all registers including H/SRR0/1 saved in a stack frame of a
+ * standard exception.
+ */
+.macro EXCEPTION_RESTORE_REGS hsrr
+ /* Move original SRR0 and SRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ .if \hsrr == EXC_HV_OR_STD
+ .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
+ .endif
+ .if \hsrr
+ mtspr SPRN_HSRR1,r9
+ .else
+ mtspr SPRN_SRR1,r9
+ .endif
+ ld r9,_NIP(r1)
+ .if \hsrr
+ mtspr SPRN_HSRR0,r9
+ .else
+ mtspr SPRN_SRR0,r9
+ .endif
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ ld r9,_CCR(r1)
+ mtcr r9
+ REST_8GPRS(2, r1)
+ REST_4GPRS(10, r1)
+ REST_GPR(0, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+.endm
+
+#define RUNLATCH_ON \
+BEGIN_FTR_SECTION \
+ ld r3, PACA_THREAD_INFO(r13); \
+ ld r4,TI_LOCAL_FLAGS(r3); \
+ andi. r0,r4,_TLF_RUNLATCH; \
+ beql ppc64_runlatch_on_trampoline; \
+END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
+
+/*
+ * When the idle code in power4_idle puts the CPU into NAP mode,
+ * it has to do so in a loop, and relies on the external interrupt
+ * and decrementer interrupt entry code to get it out of the loop.
+ * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
+ * to signal that it is in the loop and needs help to get out.
+ */
+#ifdef CONFIG_PPC_970_NAP
+#define FINISH_NAP \
+BEGIN_FTR_SECTION \
+ ld r11, PACA_THREAD_INFO(r13); \
+ ld r9,TI_LOCAL_FLAGS(r11); \
+ andi. r10,r9,_TLF_NAPPING; \
+ bnel power4_fixup_nap; \
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+#else
+#define FINISH_NAP
+#endif
+
+#define EXC_COMMON(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
+ bl save_nvgprs; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b ret_from_except
+
+/*
+ * Like EXC_COMMON, but for exceptions that can occur in the idle task and
+ * therefore need the special idle handling (finish nap and runlatch)
+ */
+#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
+ FINISH_NAP; \
+ RUNLATCH_ON; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b ret_from_except_lite
+
/*
* There are a few constraints to be concerned with.
@@ -68,6 +740,14 @@
OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
+
+#ifdef CONFIG_PPC_POWERNV
+ .globl start_real_trampolines
+ .globl end_real_trampolines
+ .globl start_virt_trampolines
+ .globl end_virt_trampolines
+#endif
+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
* Data area reserved for FWNMI option.
@@ -98,6 +778,7 @@
EXC_VIRT_NONE(0x4000, 0x100)
+EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
* If running native on arch 2.06 or later, check if we are waking up
@@ -105,55 +786,67 @@
* bits 46:47. A non-0 value indicates that we are coming from a power
* saving state. The idle wakeup handler initially runs in real mode,
* but we branch to the 0xc000... address so we can turn on relocation
- * with mtmsr.
+ * with mtmsrd later, after SPRs are restored.
+ *
+ * Careful to minimise cost for the fast path (idle wakeup) while
+ * also avoiding clobbering CFAR for the debug path (non-idle).
+ *
+ * For the idle wake case volatile registers can be clobbered, which
+ * is why we use those initially. If it turns out to not be an idle
+ * wake, carefully put everything back the way it was, so we can use
+ * common exception macros to handle it.
*/
-#define IDLETEST(n) \
- BEGIN_FTR_SECTION ; \
- mfspr r10,SPRN_SRR1 ; \
- rlwinm. r10,r10,47-31,30,31 ; \
- beq- 1f ; \
- cmpwi cr3,r10,2 ; \
- BRANCH_TO_C000(r10, system_reset_idle_common) ; \
-1: \
- KVMTEST_PR(n) ; \
- END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#else
-#define IDLETEST NOTEST
+BEGIN_FTR_SECTION
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r3,PACA_EXNMI+0*8(r13)
+ std r4,PACA_EXNMI+1*8(r13)
+ std r5,PACA_EXNMI+2*8(r13)
+ mfspr r3,SPRN_SRR1
+ mfocrf r4,0x80
+ rlwinm. r5,r3,47-31,30,31
+ bne+ system_reset_idle_wake
+ /* Not powersave wakeup. Restore regs for regular interrupt handler. */
+ mtocrf 0x80,r4
+ ld r3,PACA_EXNMI+0*8(r13)
+ ld r4,PACA_EXNMI+1*8(r13)
+ ld r5,PACA_EXNMI+2*8(r13)
+ GET_SCRATCH0(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
-EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
- SET_SCRATCH0(r13)
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
/*
* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
* being used, so a nested NMI exception would corrupt it.
+ *
+ * In theory, we should not enable relocation here if it was disabled
+ * in SRR1, because the MMU may not be configured to support it (e.g.,
+ * SLB may have been cleared). In practice, there should only be a few
+ * small windows where that's the case, and sreset is considered to
+ * be dangerous anyway.
*/
- EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD,
- IDLETEST, 0x100)
-
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
-TRAMP_KVM(PACA_EXNMI, 0x100)
+INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
-EXC_COMMON_BEGIN(system_reset_idle_common)
- mfspr r12,SPRN_SRR1
- b pnv_powersave_wakeup
+TRAMP_REAL_BEGIN(system_reset_idle_wake)
+ /* We are waking up from idle, so may clobber any volatile register */
+ cmpwi cr1,r5,2
+ bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
+ BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
#endif
+#ifdef CONFIG_PPC_PSERIES
/*
- * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
- * the right thing. We do not want to reconcile because that goes
- * through irq tracing which we don't want in NMI.
- *
- * Save PACAIRQHAPPENED because some code will do a hard disable
- * (e.g., xmon). So we want to restore this back to where it was
- * when we return. DAR is unused in the stack, so save it there.
+ * Vectors for the FWNMI option. Share common code.
*/
-#define ADD_RECONCILE_NMI \
- li r10,IRQS_ALL_DISABLED; \
- stb r10,PACAIRQSOFTMASK(r13); \
- lbz r10,PACAIRQHAPPENED(r13); \
- std r10,_DAR(r1)
+TRAMP_REAL_BEGIN(system_reset_fwnmi)
+ /* See comment at system_reset exception, don't turn on RI */
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+
+#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
/*
@@ -171,15 +864,27 @@
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
- system_reset, system_reset_exception,
- ADD_NVGPRS;ADD_RECONCILE_NMI)
+ INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
+ bl save_nvgprs
+ /*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ lbz r10,PACAIRQHAPPENED(r13)
+ std r10,_DAR(r1)
- /* This (and MCE) can be simplified with mtmsrd L=1 */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl system_reset_exception
+
/* Clear MSR_RI before setting SRR0 and SRR1. */
- li r0,MSR_RI
- mfmsr r9
- andc r9,r9,r0
+ li r9,0
mtmsrd r9,1
/*
@@ -197,68 +902,44 @@
ld r10,SOFTE(r1)
stb r10,PACAIRQSOFTMASK(r13)
- /*
- * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
- * Should share common bits...
- */
-
- /* Move original SRR0 and SRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_SRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_SRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- mtcr r11
- REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
- ld r1,GPR1(r1)
+ EXCEPTION_RESTORE_REGS EXC_STD
RFI_TO_USER_OR_KERNEL
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Vectors for the FWNMI option. Share common code.
- */
-TRAMP_REAL_BEGIN(system_reset_fwnmi)
- SET_SCRATCH0(r13) /* save r13 */
- /* See comment at system_reset exception */
- EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD,
- NOTEST, 0x100)
-#endif /* CONFIG_PPC_PSERIES */
-
EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- /* This is moved out of line as it can be patched by FW, but
- * some code path might still want to branch into the original
- * vector
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ /*
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_0(PACA_EXMC)
-BEGIN_FTR_SECTION
- b machine_check_powernv_early
-FTR_SECTION_ELSE
- b machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_powernv_early)
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+#endif
+
+INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
+
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r9,0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Decrement paca->in_mce now RI is clear. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ EXCEPTION_RESTORE_REGS EXC_STD
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+
/*
- * Register contents:
- * R13 = PACA
- * R9 = CR
- * Original R9 to R13 is saved on PACA_EXMC
- *
* Switch to mc_emergency stack and handle re-entrancy (we limit
* the nested MCE upto level 4 to avoid stack overflow).
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -279,124 +960,127 @@
* the machine check is handled then the idle wakeup code is called
* to restore state.
*/
- mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
- bne 0f /* Yes, we are. */
- /* First machine check entry */
- ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
-0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
- /* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,MAX_MCE_DEPTH
- bgt 2f /* Check if we hit limit of 4 */
- std r11,GPR1(r1) /* Save r1 on the stack. */
- std r11,0(r1) /* make stack chain pointer */
- mfspr r11,SPRN_SRR0 /* Save SRR0 */
- std r11,_NIP(r1)
- mfspr r11,SPRN_SRR1 /* Save SRR1 */
- std r11,_MSR(r1)
- mfspr r11,SPRN_DAR /* Save DAR */
- std r11,_DAR(r1)
- mfspr r11,SPRN_DSISR /* Save DSISR */
- std r11,_DSISR(r1)
- std r9,_CCR(r1) /* Save CR in stackframe */
- /* Save r9 through r13 from EXMC save area to stack frame. */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
- mfmsr r11 /* get MSR value */
- ori r11,r11,MSR_ME /* turn on ME bit */
- ori r11,r11,MSR_RI /* turn on RI bit */
- LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r11
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-2:
- /* Stack overflow. Stay on emergency stack and panic.
- * Keep the ME bit off while panic-ing, so that if we hit
- * another machine check we checkstop.
- */
- addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
- ld r11,PACAKMSR(r13)
- LOAD_HANDLER(r12, unrecover_mce)
- li r10,MSR_ME
- andc r11,r11,r10 /* Turn off MSR_ME */
- b 1b
- b . /* prevent speculative execution */
+
+ mr r10,r1 /* Save r1 */
+ bne 1f
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+ bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+
+ /* We don't touch AMR here, we never go to virtual mode */
+ INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+
+BEGIN_FTR_SECTION
+ bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+ li r10,MSR_RI
+ mtmsrd r10,1
-TRAMP_REAL_BEGIN(machine_check_pSeries)
- .globl machine_check_fwnmi
-machine_check_fwnmi:
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_0(PACA_EXMC)
-machine_check_pSeries_0:
- EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+
+#ifdef CONFIG_PPC_P7_NAP
/*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
*/
- EXCEPTION_PROLOG_2_NORI(machine_check_common, EXC_STD)
+BEGIN_FTR_SECTION
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
-TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ /*
+ * Check if we are coming from guest. If yes, then run the normal
+ * exception handler which will take the
+ * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne mce_deliver /* continue if we are. */
+#endif
+
+ /*
+ * Check if we are coming from userspace. If yes, then run the normal
+ * exception handler which will deliver the MC event to this kernel.
+ */
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne mce_deliver /* continue in V mode if we are. */
+
+ /*
+ * At this point we are coming from kernel context.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ beq unrecoverable_mce
+
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq unrecoverable_mce /* if !handled then panic */
+
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+
+mce_deliver:
+ /*
+ * This is a host user or guest MCE. Restore all registers, then
+ * run the "late" handler. For host user, this will run the
+ * machine_check_exception handler in virtual mode like a normal
+ * interrupt handler. For guest, this will trigger the KVM test
+ * and branch to the KVM interrupt similarly to other interrupts.
+ */
+BEGIN_FTR_SECTION
+ ld r10,ORIG_GPR3(r1)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ MACHINE_CHECK_HANDLER_WINDUP
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- mfspr r10,SPRN_DAR
- std r10,PACA_EXMC+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXMC+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXMC+EX_DAR(r13)
- lwz r4,PACA_EXMC+EX_DSISR(r13)
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
b ret_from_except
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r0,MSR_RI; \
- mfmsr r9; /* get MSR value */ \
- andc r9,r9,r0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Move original SRR0 and SRR1 into the respective regs */ \
- ld r9,_MSR(r1); \
- mtspr SPRN_SRR1,r9; \
- ld r3,_NIP(r1); \
- mtspr SPRN_SRR0,r3; \
- ld r9,_CTR(r1); \
- mtctr r9; \
- ld r9,_XER(r1); \
- mtxer r9; \
- ld r9,_LINK(r1); \
- mtlr r9; \
- REST_GPR(0, r1); \
- REST_8GPRS(2, r1); \
- REST_GPR(10, r1); \
- ld r11,_CCR(r1); \
- mtcr r11; \
- /* Decrement paca->in_mce. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- REST_GPR(11, r1); \
- REST_2GPRS(12, r1); \
- /* restore original r1. */ \
- ld r1,GPR1(r1)
-
#ifdef CONFIG_PPC_P7_NAP
/*
* This is an idle wakeup. Low level machine check has already been
@@ -416,79 +1100,20 @@
* Then decrement MCE nesting after finishing with the stack.
*/
ld r3,_MSR(r1)
+ ld r4,_LINK(r1)
lhz r11,PACA_IN_MCE(r13)
subi r11,r11,1
sth r11,PACA_IN_MCE(r13)
- /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
- /* Recoverability could be improved by reducing the use of SRR1. */
- li r11,0
- mtmsrd r11,1
-
- b pnv_powersave_wakeup_mce
-#endif
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
-EXC_COMMON_BEGIN(machine_check_handle_early)
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss,
- * supervisor state loss or hypervisor state loss.
- *
- * Go back to nap/sleep/winkle mode again if (b) is true.
- */
- BEGIN_FTR_SECTION
- rlwinm. r11,r12,47-31,30,31
- bne machine_check_idle_common
- END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ mtlr r4
+ rlwinm r10,r3,47-31,30,31
+ cmpwi cr1,r10,2
+ bltlr cr1 /* no state loss, return to idle caller */
+ b idle_return_gpr_loss
#endif
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
- andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
-
-5:
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- /*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
-#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
+EXC_COMMON_BEGIN(unrecoverable_mce)
/*
* We are going down. But there are chances that we might get hit by
* another MCE during panic path and we may run into unstable state
@@ -496,69 +1121,49 @@
* when another MCE is hit during panic path, system will checkstop
* and hypervisor will get restarted cleanly by SP.
*/
+BEGIN_FTR_SECTION
+ li r10,0 /* clear MSR_RI */
+ mtmsrd r10,1
+ bl disable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+ ld r10,PACAKMSR(r13)
li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
+ andc r10,r10,r3
+ mtmsrd r10
- beq 1b /* if !handled then panic */
- /*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
- */
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_USER_OR_KERNEL
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- b machine_check_pSeries
-
-EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
+
/*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
+ * We will not reach here. Even if we did, there is no way out.
+ * Call unrecoverable_exception and die.
*/
-1: addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl unrecoverable_exception
- b 1b
+ b .
-EXC_REAL(data_access, 0x300, 0x80)
-EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
-
+EXC_REAL_BEGIN(data_access, 0x300, 0x80)
+ INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(data_access, 0x300, 0x80)
+EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
+ INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+EXC_VIRT_END(data_access, 0x4300, 0x80)
+INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
/*
* Here r13 points to the paca, r9 contains the saved CR,
* SRR0 and SRR1 are saved in r11 and r12,
* r9 - r13 are saved in paca->exgen.
+ * EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x300
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -566,43 +1171,51 @@
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_DAR
- mfspr r11,SPRN_SRR1
- crset 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+ INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
EXC_REAL_END(data_access_slb, 0x380, 0x80)
-
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_DAR
- mfspr r11,SPRN_SRR1
- crset 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+ INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
-
-
-EXC_REAL(instruction_access, 0x400, 0x80)
-EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
-TRAMP_KVM(PACA_EXGEN, 0x400)
-
-EXC_COMMON_BEGIN(instruction_access_common)
- EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,DSISR_SRR1_MATCH_64S@h
- li r5,0x400
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
+EXC_COMMON_BEGIN(data_access_slb_common)
+ INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ ld r4,_DAR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
+ bl do_slb_fault
+ cmpdi r3,0
+ bne- 1f
+ b fast_exception_return
+1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ std r3,RESULT(r1)
+ bl save_nvgprs
+ RECONCILE_IRQ_STATE(r10, r11)
+ ld r4,_DAR(r1)
+ ld r5,RESULT(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_bad_slb_fault
+ b ret_from_except
+
+
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+ INT_HANDLER instruction_access, 0x400, kvm=1
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+ INT_HANDLER instruction_access, 0x400, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(instruction_access_common)
+ INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x400
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -610,210 +1223,68 @@
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r11,SPRN_SRR1
- crclr 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+ INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
-
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r11,SPRN_SRR1
- crclr 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+ INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
-
-/*
- * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
- * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
- */
-EXC_COMMON_BEGIN(slb_miss_common)
- /*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contains the saved r3,
- * r11 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
- * We assume we aren't going to take any exceptions during this
- * procedure.
- */
- mflr r10
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- andi. r9,r11,MSR_PR // Check for exception from userspace
- cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
-
- /*
- * Test MSR_RI before calling slb_allocate_realmode, because the
- * MSR in r11 gets clobbered. However we still want to allocate
- * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
- * recursive SLB faults. So use cr5 for this, which is preserved.
- */
- andi. r11,r11,MSR_RI /* check for unrecoverable exception */
- cmpdi cr5,r11,MSR_RI
-
- crset 4*cr0+eq
-#ifdef CONFIG_PPC_BOOK3S_64
+INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
+EXC_COMMON_BEGIN(instruction_access_slb_common)
+ INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ ld r4,_DAR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- bl slb_allocate
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
-#endif
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
- mtlr r10
-
- /*
- * Large address, check whether we have to allocate new contexts.
- */
- beq- 8f
-
- bne- cr5,2f /* if unrecoverable exception, oops */
-
- /* All done -- return from exception. */
-
- bne cr4,1f /* returning to kernel */
-
- mtcrf 0x80,r9
- mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
- mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
- mtcrf 0x02,r9 /* I/D indication is in cr6 */
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-
- RESTORE_CTR(r9, PACA_EXSLB)
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- mr r3,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- RFI_TO_USER
- b . /* prevent speculative execution */
-1:
- mtcrf 0x80,r9
- mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
- mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
- mtcrf 0x02,r9 /* I/D indication is in cr6 */
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-
- RESTORE_CTR(r9, PACA_EXSLB)
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- mr r3,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-
-2: std r3,PACA_EXSLB+EX_DAR(r13)
- mr r3,r12
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,unrecov_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-
-8: std r3,PACA_EXSLB+EX_DAR(r13)
- mr r3,r12
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10, large_addr_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-
-EXC_COMMON_BEGIN(unrecov_slb)
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- RECONCILE_IRQ_STATE(r10, r11)
+ /* HPT case, do SLB fault */
+ bl do_slb_fault
+ cmpdi r3,0
+ bne- 1f
+ b fast_exception_return
+1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ std r3,RESULT(r1)
bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(large_addr_slb)
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
- ld r3, PACA_EXSLB+EX_DAR(r13)
- std r3, _DAR(r1)
- beq cr6, 2f
- li r10, 0x481 /* fix trap number for I-SLB miss */
- std r10, _TRAP(r1)
-2: bl save_nvgprs
- addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_large_addr
+ ld r4,_DAR(r1)
+ ld r5,RESULT(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_bad_slb_fault
b ret_from_except
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- .globl hardware_interrupt_hv;
-hardware_interrupt_hv:
- BEGIN_FTR_SECTION
- MASKABLE_EXCEPTION_HV(0x500, hardware_interrupt_common, IRQS_DISABLED)
- FTR_SECTION_ELSE
- MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, IRQS_DISABLED)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
-
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- .globl hardware_interrupt_relon_hv;
-hardware_interrupt_relon_hv:
- BEGIN_FTR_SECTION
- MASKABLE_RELON_EXCEPTION_HV(0x500, hardware_interrupt_common,
- IRQS_DISABLED)
- FTR_SECTION_ELSE
- __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common,
- EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x500)
-TRAMP_KVM_HV(PACA_EXGEN, 0x500)
+INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
-EXC_REAL(alignment, 0x600, 0x100)
-EXC_VIRT(alignment, 0x4600, 0x100, 0x600)
-TRAMP_KVM(PACA_EXGEN, 0x600)
+EXC_REAL_BEGIN(alignment, 0x600, 0x100)
+ INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(alignment, 0x600, 0x100)
+EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
+ INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
+EXC_VIRT_END(alignment, 0x4600, 0x100)
+INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
b ret_from_except
-EXC_REAL(program_check, 0x700, 0x100)
-EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
-TRAMP_KVM(PACA_EXGEN, 0x700)
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ INT_HANDLER program_check, 0x700, kvm=1
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+ INT_HANDLER program_check, 0x700, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
/*
* It's possible to receive a TM Bad Thing type program check with
@@ -823,39 +1294,49 @@
* we switch to the emergency stack if we're taking a TM Bad Thing from
* the kernel.
*/
- li r10,MSR_PR /* Build a mask of MSR_PR .. */
- oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */
- and r10,r10,r12 /* Mask SRR1 with that. */
- srdi r10,r10,8 /* Shift it so we can compare */
- cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */
- bne 1f /* If != go to normal path. */
- /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */
- andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */
+ andi. r10,r12,MSR_PR
+ bne 2f /* If userspace, go normal path */
+
+ andis. r10,r12,(SRR1_PROGTM)@h
+ bne 1f /* If TM, emergency */
+
+ cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */
+ blt 2f /* normal path if not */
+
+ /* Use the emergency stack */
+1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
/* 3 in EXCEPTION_PROLOG_COMMON */
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- b 3f /* Jump into the macro !! */
-1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+ INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ b 3f
+2:
+ INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+3:
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
b ret_from_except
-EXC_REAL(fp_unavailable, 0x800, 0x100)
-EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800)
-TRAMP_KVM(PACA_EXGEN, 0x800)
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, kvm=1
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
+ INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
bne 1f /* if from user, just load it up */
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
@@ -878,21 +1359,33 @@
#endif
-EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0x900)
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+ INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+ INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-EXC_REAL_HV(hdecrementer, 0x980, 0x80)
-EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
-TRAMP_KVM_HV(PACA_EXGEN, 0x980)
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xa00)
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
#else
@@ -900,17 +1393,13 @@
#endif
-EXC_REAL(trap_0b, 0xb00, 0x100)
-EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
-TRAMP_KVM(PACA_EXGEN, 0xb00)
-EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
/*
* system call / hypercall (0xc00, 0x4c00)
*
* The system call exception is invoked with "sc 0" and does not alter HV bit.
- * There is support for kernel code to invoke system calls but there are no
- * in-tree users.
*
* The hypercall is invoked with "sc 1" and sets HV=1.
*
@@ -919,7 +1408,7 @@
*
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*
* For hypercalls, the register convention is as follows:
* r0 volatile
@@ -941,6 +1430,7 @@
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* There is a little bit of juggling to get syscall and hcall
@@ -950,95 +1440,66 @@
* Userspace syscalls have already saved the PPR, hcalls must save
* it before setting HMT_MEDIUM.
*/
-#define SYSCALL_KVMTEST \
- mtctr r13; \
- GET_PACA(r13); \
- std r10,PACA_EXGEN+EX_R10(r13); \
- INTERRUPT_TO_KERNEL; \
- KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
- HMT_MEDIUM; \
- mfctr r9;
-
+ mtctr r13
+ GET_PACA(r13)
+ std r10,PACA_EXGEN+EX_R10(r13)
+ INTERRUPT_TO_KERNEL
+ KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
+ mfctr r9
#else
-#define SYSCALL_KVMTEST \
- HMT_MEDIUM; \
- mr r9,r13; \
- GET_PACA(r13); \
- INTERRUPT_TO_KERNEL;
+ mr r9,r13
+ GET_PACA(r13)
+ INTERRUPT_TO_KERNEL
#endif
-
-#define LOAD_SYSCALL_HANDLER(reg) \
- __LOAD_HANDLER(reg, system_call_common)
-
-/*
- * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
- * and HMT_MEDIUM.
- */
-#define SYSCALL_REAL \
- mfspr r11,SPRN_SRR0 ; \
- mfspr r12,SPRN_SRR1 ; \
- LOAD_SYSCALL_HANDLER(r10) ; \
- mtspr SPRN_SRR0,r10 ; \
- ld r10,PACAKMSR(r13) ; \
- mtspr SPRN_SRR1,r10 ; \
- RFI_TO_KERNEL ; \
- b . ; /* prevent speculative execution */
#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
-#define SYSCALL_FASTENDIAN_TEST \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
-
-#define SYSCALL_FASTENDIAN \
- /* Fast LE/BE switch system call */ \
-1: mfspr r12,SPRN_SRR1 ; \
- xori r12,r12,MSR_LE ; \
- mtspr SPRN_SRR1,r12 ; \
- mr r13,r9 ; \
- RFI_TO_USER ; /* return to userspace */ \
- b . ; /* prevent speculative execution */
-#else
-#define SYSCALL_FASTENDIAN_TEST
-#define SYSCALL_FASTENDIAN
-#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
-
-#if defined(CONFIG_RELOCATABLE)
- /*
- * We can't branch directly so we do it via the CTR which
- * is volatile across system calls.
- */
-#define SYSCALL_VIRT \
- LOAD_SYSCALL_HANDLER(r10) ; \
- mtctr r10 ; \
- mfspr r11,SPRN_SRR0 ; \
- mfspr r12,SPRN_SRR1 ; \
- li r10,MSR_RI ; \
- mtmsrd r10,1 ; \
- bctr ;
-#else
- /* We can branch directly */
-#define SYSCALL_VIRT \
- mfspr r11,SPRN_SRR0 ; \
- mfspr r12,SPRN_SRR1 ; \
- li r10,MSR_RI ; \
- mtmsrd r10,1 ; /* Set RI (EE=0) */ \
- b system_call_common ;
+BEGIN_FTR_SECTION
+ cmpdi r0,0x1ebe
+ beq- 1f
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
#endif
-EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
- SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
- SYSCALL_FASTENDIAN_TEST
- SYSCALL_REAL
- SYSCALL_FASTENDIAN
-EXC_REAL_END(system_call, 0xc00, 0x100)
+ /* We reach here with PACA in r13, r13 in r9. */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ HMT_MEDIUM
+
+ .if ! \virt
+ __LOAD_HANDLER(r10, system_call_common)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+ .else
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_HANDLER(r10, system_call_common)
+ mtctr r10
+ bctr
+#else
+ b system_call_common
+#endif
+ .endif
+
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+ /* Fast LE/BE switch system call */
+1: mfspr r12,SPRN_SRR1
+ xori r12,r12,MSR_LE
+ mtspr SPRN_SRR1,r12
+ mr r13,r9
+ RFI_TO_USER /* return to userspace */
+ b . /* prevent speculative execution */
+#endif
+.endm
+
+EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
+ SYSTEM_CALL 0
+EXC_REAL_END(system_call, 0xc00, 0x100)
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
- SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
- SYSCALL_FASTENDIAN_TEST
- SYSCALL_VIRT
- SYSCALL_FASTENDIAN
+ SYSTEM_CALL 1
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
@@ -1049,7 +1510,7 @@
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(do_kvm_0xc00)
+TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1062,40 +1523,58 @@
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+ KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
#endif
-EXC_REAL(single_step, 0xd00, 0x100)
-EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00)
-TRAMP_KVM(PACA_EXGEN, 0xd00)
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+ INT_HANDLER single_step, 0xd00, kvm=1
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+ INT_HANDLER single_step, 0xd00, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(single_step_common, 0xd00, single_step_exception)
-EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
-EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ ld r4,_DAR(r1)
+ li r5,SIGSEGV
+ bl bad_page_fault
+MMU_FTR_SECTION_ELSE
bl unknown_exception
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
-EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
-EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20)
-EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
@@ -1104,68 +1583,55 @@
* first, and then eventaully from there to the trampoline to get into virtual
* mode.
*/
-__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
+EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
+ INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
+EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
-TRAMP_REAL_BEGIN(hmi_exception_early)
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60)
+INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(hmi_exception_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
- EXCEPTION_PROLOG_COMMON_1()
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
- addi r3,r1,STACK_FRAME_OVERHEAD
- BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
- cmpdi cr0,r3,0
- /* Windup the stack. */
- /* Move original HSRR0 and HSRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_HSRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_HSRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- REST_2GPRS(12, r1)
+ /* We don't touch AMR here, we never go to virtual mode */
+ INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl hmi_exception_realmode
+ cmpdi cr0,r3,0
bne 1f
- mtcr r11
- REST_GPR(11, r1)
- ld r1,GPR1(r1)
+
+ EXCEPTION_RESTORE_REGS EXC_HV
HRFI_TO_USER_OR_KERNEL
-1: mtcr r11
- REST_GPR(11, r1)
- ld r1,GPR1(r1)
-
+1:
/*
* Go to virtual mode and pull the HMI event information from
* firmware.
*/
- .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b tramp_real_hmi_exception
+ EXCEPTION_RESTORE_REGS EXC_HV
+ INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
EXC_COMMON_BEGIN(hmi_exception_common)
-EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
- ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
+ INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
+ FINISH_NAP
+ RUNLATCH_ON
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl handle_hmi_exception
+ b ret_from_except
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
#else
@@ -1173,9 +1639,13 @@
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1185,17 +1655,25 @@
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
-EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xf00)
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
-EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20)
-EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20)
-TRAMP_KVM(PACA_EXGEN, 0xf20)
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
+ INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1228,11 +1706,15 @@
b ret_from_except
-EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
-EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40)
-TRAMP_KVM(PACA_EXGEN, 0xf40)
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
+ INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1264,15 +1746,23 @@
b ret_from_except
-EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
-EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60)
-TRAMP_KVM(PACA_EXGEN, 0xf60)
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20)
-EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80)
-TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
@@ -1289,9 +1779,11 @@
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_system_error, 0x1200, 0x100)
+EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
+ INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
+INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
@@ -1299,38 +1791,43 @@
#endif
-EXC_REAL(instruction_breakpoint, 0x1300, 0x100)
-EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300)
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- mtspr SPRN_SPRG_HSCRATCH0,r13
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
-
+ INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
-
- KVMTEST_HV(0x1500)
- EXCEPTION_PROLOG_2(denorm_common, EXC_HV)
+ KVMTEST denorm_exception_hv, EXC_HV 0x1500
+ INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- b exc_real_0x1500_denorm_exception_hv
+ INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
+ mfspr r10,SPRN_HSRR1
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+ INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
+INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1345,12 +1842,11 @@
mtmsrd r10
sync
-#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
-#define FMR4(n) FMR2(n) ; FMR2(n+2)
-#define FMR8(n) FMR4(n) ; FMR4(n+4)
-#define FMR16(n) FMR8(n) ; FMR8(n+8)
-#define FMR32(n) FMR16(n) ; FMR16(n+16)
- FMR32(0)
+ .Lreg=0
+ .rept 32
+ fmr .Lreg,.Lreg
+ .Lreg=.Lreg+1
+ .endr
FTR_SECTION_ELSE
/*
@@ -1362,12 +1858,11 @@
mtmsrd r10
sync
-#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
-#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
-#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
-#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
-#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
- XVCPSGNDP32(0)
+ .Lreg=0
+ .rept 32
+ XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+ .Lreg=.Lreg+1
+ .endr
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
@@ -1378,7 +1873,12 @@
* To denormalise we need to move a copy of the register to itself.
* For POWER8 we need to do that for all 64 VSX registers
*/
- XVCPSGNDP32(32)
+ .Lreg=32
+ .rept 32
+ XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+ .Lreg=.Lreg+1
+ .endr
+
denorm_done:
mfspr r11,SPRN_HSRR0
subi r11,r11,4
@@ -1402,9 +1902,11 @@
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100)
+EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
+ INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
+INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
@@ -1412,9 +1914,13 @@
#endif
-EXC_REAL(altivec_assist, 0x1700, 0x100)
-EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700)
-TRAMP_KVM(PACA_EXGEN, 0x1700)
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, kvm=1
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_ALTIVEC
EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
#else
@@ -1423,15 +1929,18 @@
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_thermal, 0x1800, 0x100)
+EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
+ INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
+INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+
#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -1441,7 +1950,7 @@
std r12,PACA_EXGEN+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,PACA_EXGEN+EX_R13(r13); \
- EXCEPTION_PROLOG_2(soft_nmi_common, _H)
+ INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -1456,9 +1965,10 @@
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
- system_reset, soft_nmi_interrupt,
- ADD_NVGPRS;ADD_RECONCILE)
+ INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl soft_nmi_interrupt
b ret_from_except
#else /* CONFIG_PPC_WATCHDOG */
@@ -1476,35 +1986,50 @@
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
-#define MASKED_INTERRUPT(_H) \
-masked_##_H##interrupt: \
- std r11,PACA_EXGEN+EX_R11(r13); \
- lbz r11,PACAIRQHAPPENED(r13); \
- or r11,r11,r10; \
- stb r11,PACAIRQHAPPENED(r13); \
- cmpwi r10,PACA_IRQ_DEC; \
- bne 1f; \
- lis r10,0x7fff; \
- ori r10,r10,0xffff; \
- mtspr SPRN_DEC,r10; \
- b MASKED_DEC_HANDLER_LABEL; \
-1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
- beq 2f; \
- mfspr r10,SPRN_##_H##SRR1; \
- xori r10,r10,MSR_EE; /* clear MSR_EE */ \
- mtspr SPRN_##_H##SRR1,r10; \
- ori r11,r11,PACA_IRQ_HARD_DIS; \
- stb r11,PACAIRQHAPPENED(r13); \
-2: /* done */ \
- mtcrf 0x80,r9; \
- std r1,PACAR1(r13); \
- ld r9,PACA_EXGEN+EX_R9(r13); \
- ld r10,PACA_EXGEN+EX_R10(r13); \
- ld r11,PACA_EXGEN+EX_R11(r13); \
- /* returns to kernel where r13 must be set up, so don't restore it */ \
- ##_H##RFI_TO_KERNEL; \
- b .; \
- MASKED_DEC_HANDLER(_H)
+.macro MASKED_INTERRUPT hsrr
+ .if \hsrr
+masked_Hinterrupt:
+ .else
+masked_interrupt:
+ .endif
+ std r11,PACA_EXGEN+EX_R11(r13)
+ lbz r11,PACAIRQHAPPENED(r13)
+ or r11,r11,r10
+ stb r11,PACAIRQHAPPENED(r13)
+ cmpwi r10,PACA_IRQ_DEC
+ bne 1f
+ lis r10,0x7fff
+ ori r10,r10,0xffff
+ mtspr SPRN_DEC,r10
+ b MASKED_DEC_HANDLER_LABEL
+1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
+ beq 2f
+ .if \hsrr
+ mfspr r10,SPRN_HSRR1
+ xori r10,r10,MSR_EE /* clear MSR_EE */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r10,SPRN_SRR1
+ xori r10,r10,MSR_EE /* clear MSR_EE */
+ mtspr SPRN_SRR1,r10
+ .endif
+ ori r11,r11,PACA_IRQ_HARD_DIS
+ stb r11,PACAIRQHAPPENED(r13)
+2: /* done */
+ mtcrf 0x80,r9
+ std r1,PACAR1(r13)
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ /* returns to kernel where r13 must be set up, so don't restore it */
+ .if \hsrr
+ HRFI_TO_KERNEL
+ .else
+ RFI_TO_KERNEL
+ .endif
+ b .
+ MASKED_DEC_HANDLER(\hsrr\())
+.endm
TRAMP_REAL_BEGIN(stf_barrier_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
@@ -1611,8 +2136,8 @@
* cannot reach these if they are put there.
*/
USE_FIXED_SECTION(virt_trampolines)
- MASKED_INTERRUPT()
- MASKED_INTERRUPT(H)
+ MASKED_INTERRUPT EXC_STD
+ MASKED_INTERRUPT EXC_HV
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
@@ -1698,6 +2223,35 @@
USE_TEXT_SECTION()
+/* MSR[RI] should be clear because this uses SRR[01] */
+enable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ ori r3,r3,MSR_ME
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+disable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ li r4,MSR_ME
+ andc r3,r3,r4
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
/*
* Hash table stuff
*/
@@ -1706,23 +2260,21 @@
#ifdef CONFIG_PPC_BOOK3S_64
lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r4,r0 /* weird error? */
+ and. r0,r5,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
bne 77f /* then don't call hash_page now */
/*
- * r3 contains the faulting address
- * r4 msr
- * r5 contains the trap number
- * r6 contains dsisr
+ * r3 contains the trap number
+ * r4 contains the faulting address
+ * r5 contains dsisr
+ * r6 msr
*
* at return r3 = 0 for success, 1 for page fault, negative for error
*/
- mr r4,r12
- ld r6,_DSISR(r1)
bl __hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if __hash_page succeeded */
@@ -1732,24 +2284,23 @@
/* Error */
blt- 13f
- /* Reload DSISR into r4 for the DABR check below */
- ld r4,_DSISR(r1)
+ /* Reload DAR/DSISR into r4/r5 for the DABR check below */
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: andis. r0,r4,DSISR_DABRMATCH@h
+11: andis. r0,r5,DSISR_DABRMATCH@h
bne- handle_dabr_fault
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
- beq+ 12f
+ beq+ ret_from_except_lite
bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
+ ld r4,_DAR(r1)
bl bad_page_fault
b ret_from_except
@@ -1760,7 +2311,12 @@
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_break
-12: b ret_from_except_lite
+ /*
+ * do_break() may have changed the NV GPRS while handling a breakpoint.
+ * If so, we need to restore them with their updated values. Don't use
+ * ret_from_except_lite here.
+ */
+ b ret_from_except
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1783,74 +2339,12 @@
* the access, or panic if there isn't a handler.
*/
77: bl save_nvgprs
- mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
b ret_from_except
/*
- * Here we have detected that the kernel stack pointer is bad.
- * R9 contains the saved CR, r13 points to the paca,
- * r10 contains the (bad) kernel stack pointer,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using an emergency stack, save the registers there,
- * and call kernel_bad_stack(), which panics.
- */
-bad_stack:
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,64+INT_FRAME_SIZE
- std r9,_CCR(r1)
- std r10,GPR1(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- mfspr r11,SPRN_DAR
- mfspr r12,SPRN_DSISR
- std r11,_DAR(r1)
- std r12,_DSISR(r1)
- mflr r10
- mfctr r11
- mfxer r12
- std r10,_LINK(r1)
- std r11,_CTR(r1)
- std r12,_XER(r1)
- SAVE_GPR(0,r1)
- SAVE_GPR(2,r1)
- ld r10,EX_R3(r3)
- std r10,GPR3(r1)
- SAVE_GPR(4,r1)
- SAVE_4GPRS(5,r1)
- ld r9,EX_R9(r3)
- ld r10,EX_R10(r3)
- SAVE_2GPRS(9,r1)
- ld r9,EX_R11(r3)
- ld r10,EX_R12(r3)
- ld r11,EX_R13(r3)
- std r9,GPR11(r1)
- std r10,GPR12(r1)
- std r11,GPR13(r1)
-BEGIN_FTR_SECTION
- ld r10,EX_CFAR(r3)
- std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- SAVE_8GPRS(14,r1)
- SAVE_10GPRS(22,r1)
- lhz r12,PACA_TRAP_SAVE(r13)
- std r12,_TRAP(r1)
- addi r11,r1,INT_FRAME_SIZE
- std r11,0(r1)
- li r12,0
- std r12,0(r11)
- ld r2,PACATOC(r13)
- ld r11,exception_marker@toc(r2)
- std r12,RESULT(r1)
- std r11,STACK_FRAME_OVERHEAD-16(r1)
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl kernel_bad_stack
- b 1b
-_ASM_NOKPROBE_SYMBOL(bad_stack);
-
-/*
* When doorbell is triggered from system reset wakeup, the message is
* not cleared, so it would fire again when EE is enabled.
*
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index a711d22..ed59855 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Firmware Assisted dump: A robust mechanism to get reliable kernel crash
* dump with assistance from firmware. This approach does not use kexec,
@@ -6,20 +7,6 @@
* from phyp assisted dump implementation written by Linas Vepstas and
* Manish Ahuja
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2011 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -35,97 +22,128 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
+#include <linux/cma.h>
+#include <linux/hugetlb.h>
#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges *crash_memory_ranges;
-int crash_memory_ranges_size;
-int crash_mem_ranges;
-int max_crash_mem_ranges;
+struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
+struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
-/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
+/*
+ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
+ *
+ * This function initializes CMA area from fadump reserved memory.
+ * The total size of fadump reserved memory covers for boot memory size
+ * + cpu data size + hpte size and metadata.
+ * Initialize only the area equivalent to boot memory size for CMA use.
+ * The reamining portion of fadump reserved memory will be not given
+ * to CMA and pages for thoes will stay reserved. boot memory size is
+ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
+ * But for some reason even if it fails we still have the memory reservation
+ * with us and we can still continue doing fadump.
+ */
+int __init fadump_cma_init(void)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const __be32 *token;
+ unsigned long long base, size;
+ int rc;
- if (depth != 1 || strcmp(uname, "rtas") != 0)
+ if (!fw_dump.fadump_enabled)
return 0;
/*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
+ * Do not use CMA if user has provided fadump=nocma kernel parameter.
+ * Return 1 to continue with fadump old behaviour.
*/
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
+ if (fw_dump.nocma)
return 1;
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
+ base = fw_dump.reserve_dump_area_start;
+ size = fw_dump.boot_memory_size;
- /*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
- */
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
+ if (!size)
+ return 0;
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
-
- if (!sections)
+ rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
+ if (rc) {
+ pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
+ /*
+ * Though the CMA init has failed we still have memory
+ * reservation with us. The reserved memory will be
+ * blocked from production system usage. Hence return 1,
+ * so that we can continue with fadump.
+ */
return 1;
-
- num_sections = size / (3 * sizeof(u32));
-
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
-
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(§ions[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(§ions[1], 2);
- break;
- }
}
+ /*
+ * So we now have successfully initialized cma area for fadump.
+ */
+ pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
+ "bytes of memory reserved for firmware-assisted dump\n",
+ cma_get_size(fadump_cma),
+ (unsigned long)cma_get_base(fadump_cma) >> 20,
+ fw_dump.reserve_dump_area_size);
return 1;
}
+#else
+static int __init fadump_cma_init(void) { return 1; }
+#endif /* CONFIG_CMA */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (depth != 1)
+ return 0;
+
+ if (strcmp(uname, "rtas") == 0) {
+ rtas_fadump_dt_scan(&fw_dump, node);
+ return 1;
+ }
+
+ if (strcmp(uname, "ibm,opal") == 0) {
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+ }
+
+ return 0;
+}
/*
* If fadump is registered, check if the memory provided
- * falls within boot memory area.
+ * falls within boot memory area and reserved memory area.
*/
-int is_fadump_boot_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, unsigned long size)
{
+ u64 d_start, d_end;
+
if (!fw_dump.dump_registered)
return 0;
- return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+ if (!size)
+ return 0;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ if (((addr + size) > d_start) && (addr <= d_end))
+ return 1;
+
+ return (addr <= fw_dump.boot_mem_top);
}
int should_fadump_crash(void)
@@ -141,40 +159,75 @@
}
/*
- * Returns 1, if there are no holes in boot memory area,
- * 0 otherwise.
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
*/
-static int is_boot_memory_area_contiguous(void)
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(RMA_START);
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
- unsigned int ret = 0;
+ bool ret = false;
+ u64 start, end;
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart < tend) {
- /* Memory hole from start_pfn to tstart */
- if (tstart > start_pfn)
+ start = max_t(u64, d_start, reg->base);
+ end = min_t(u64, d_end, (reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
break;
- if (tend == end_pfn) {
- ret = 1;
+ if (end == d_end) {
+ ret = true;
break;
}
- start_pfn = tend + 1;
+ d_start = end + 1;
}
}
return ret;
}
+/*
+ * Returns true, if there are no holes in boot memory area,
+ * false otherwise.
+ */
+bool is_fadump_boot_mem_contiguous(void)
+{
+ unsigned long d_start, d_end;
+ bool ret = false;
+ int i;
+
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ d_start = fw_dump.boot_mem_addr[i];
+ d_end = d_start + fw_dump.boot_mem_sz[i];
+
+ ret = is_fadump_mem_area_contiguous(d_start, d_end);
+ if (!ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+ u64 d_start, d_end;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ return is_fadump_mem_area_contiguous(d_start, d_end);
+}
+
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
@@ -188,62 +241,13 @@
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
-}
-
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
- fdm->header.dump_num_sections = cpu_to_be16(3);
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.boot_memory_size;
-
- return addr;
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+ }
}
/**
@@ -261,10 +265,10 @@
* that is required for a kernel to boot successfully.
*
*/
-static inline unsigned long fadump_calculate_reserve_size(void)
+static inline u64 fadump_calculate_reserve_size(void)
{
+ u64 base, size, bootmem_min;
int ret;
- unsigned long long base, size;
if (fw_dump.reserve_bootvar)
pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
@@ -314,7 +318,8 @@
if (memory_limit && size > memory_limit)
size = memory_limit;
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ return (size > bootmem_min ? size : bootmem_min);
}
/*
@@ -335,51 +340,137 @@
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
size = PAGE_ALIGN(size);
+
+ /* This is to hold kernel metadata on platforms that support it */
+ size += (fw_dump.ops->fadump_get_metadata_size ?
+ fw_dump.ops->fadump_get_metadata_size() : 0);
return size;
}
-static void __init fadump_reserve_crash_area(unsigned long base,
- unsigned long size)
+static int __init add_boot_mem_region(unsigned long rstart,
+ unsigned long rsize)
{
- struct memblock_region *reg;
- unsigned long mstart, mend, msize;
+ int i = fw_dump.boot_mem_regs_cnt++;
- for_each_memblock(memory, reg) {
- mstart = max_t(unsigned long, base, reg->base);
- mend = reg->base + reg->size;
- mend = min(base + size, mend);
-
- if (mstart < mend) {
- msize = mend - mstart;
- memblock_reserve(mstart, msize);
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
- (msize >> 20), mstart);
- }
+ if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
+ fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+ return 0;
}
+
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+ i, rstart, (rstart + rsize));
+ fw_dump.boot_mem_addr[i] = rstart;
+ fw_dump.boot_mem_sz[i] = rsize;
+ return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+ unsigned long msize)
+{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = mstart;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+ while (msize) {
+ if (msize > max_size)
+ rsize = max_size;
+ else
+ rsize = msize;
+
+ ret = add_boot_mem_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ msize -= rsize;
+ rstart += rsize;
+ }
+
+ return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
+{
+ unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
+ struct memblock_region *reg;
+ int ret = 1;
+
+ fw_dump.boot_mem_regs_cnt = 0;
+
+ last_end = 0;
+ hole_size = 0;
+ cur_size = 0;
+ for_each_memblock(memory, reg) {
+ base = reg->base;
+ size = reg->size;
+ hole_size += (base - last_end);
+
+ if ((cur_size + size) >= mem_size) {
+ size = (mem_size - cur_size);
+ ret = add_boot_mem_regions(base, size);
+ break;
+ }
+
+ mem_size -= size;
+ cur_size += size;
+ ret = add_boot_mem_regions(base, size);
+ if (!ret)
+ break;
+
+ last_end = base + size;
+ }
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+ return ret;
}
int __init fadump_reserve_mem(void)
{
- unsigned long base, size, memory_boundary;
+ u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
+ bool is_memblock_bottom_up = memblock_bottom_up();
+ int ret = 1;
if (!fw_dump.fadump_enabled)
return 0;
if (!fw_dump.fadump_supported) {
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
- " this hardware\n");
- fw_dump.fadump_enabled = 0;
- return 0;
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+ goto error_out;
}
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+ if (!fw_dump.dump_active) {
+ fw_dump.boot_memory_size =
+ PAGE_ALIGN(fadump_calculate_reserve_size());
+#ifdef CONFIG_CMA
+ if (!fw_dump.nocma) {
+ align = FADUMP_CMA_ALIGNMENT;
+ fw_dump.boot_memory_size =
+ ALIGN(fw_dump.boot_memory_size, align);
+ }
+#endif
+
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ if (fw_dump.boot_memory_size < bootmem_min) {
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+ fw_dump.boot_memory_size, bootmem_min);
+ goto error_out;
+ }
+
+ if (!fadump_get_boot_mem_regions()) {
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ goto error_out;
+ }
+ }
/*
* Calculate the memory boundary.
@@ -398,10 +489,13 @@
" dump, now %#016llx\n", memory_limit);
}
if (memory_limit)
- memory_boundary = memory_limit;
+ mem_boundary = memory_limit;
else
- memory_boundary = memblock_end_of_DRAM();
+ mem_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_mem_top;
+ size = get_fadump_area_size();
+ fw_dump.reserve_dump_area_size = size;
if (fw_dump.dump_active) {
pr_info("Firmware-assisted dump is active.\n");
@@ -415,56 +509,55 @@
#endif
/*
* If last boot has crashed then reserve all the memory
- * above boot_memory_size so that we don't touch it until
+ * above boot memory size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- fadump_reserve_crash_area(base, size);
+ fadump_reserve_crash_area(base);
- fw_dump.fadumphdr_addr =
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
- be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %p\n",
- (void *) fw_dump.fadumphdr_addr);
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+ pr_debug("Reserve dump area start address: 0x%lx\n",
+ fw_dump.reserve_dump_area_start);
} else {
- size = get_fadump_area_size();
-
/*
* Reserve memory at an offset closer to bottom of the RAM to
- * minimize the impact of memory hot-remove operation. We can't
- * use memblock_find_in_range() here since it doesn't allocate
- * from bottom to top.
+ * minimize the impact of memory hot-remove operation.
*/
- for (base = fw_dump.boot_memory_size;
- base <= (memory_boundary - size);
- base += size) {
- if (memblock_is_region_memory(base, size) &&
- !memblock_is_region_reserved(base, size))
- break;
+ memblock_set_bottom_up(true);
+ base = memblock_find_in_range(base, mem_boundary, size, align);
+
+ /* Restore the previous allocation mode */
+ memblock_set_bottom_up(is_memblock_bottom_up);
+
+ if (!base) {
+ pr_err("Failed to find memory chunk for reservation!\n");
+ goto error_out;
}
- if ((base > (memory_boundary - size)) ||
- memblock_reserve(base, size)) {
- pr_err("Failed to reserve memory\n");
- return 0;
+ fw_dump.reserve_dump_area_start = base;
+
+ /*
+ * Calculate the kernel metadata address and register it with
+ * f/w if the platform supports.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ goto error_out;
+
+ if (memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory!\n");
+ goto error_out;
}
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
- "assisted dump (System RAM: %ldMB)\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
+
+ ret = fadump_cma_init();
}
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- return 1;
-}
-
-unsigned long __init arch_reserved_kernel_pages(void)
-{
- return memblock_reserved_size() / PAGE_SIZE;
+ return ret;
+error_out:
+ fw_dump.fadump_enabled = 0;
+ return 0;
}
/* Look for fadump= cmdline option. */
@@ -477,6 +570,10 @@
fw_dump.fadump_enabled = 1;
else if (strncmp(p, "off", 3) == 0)
fw_dump.fadump_enabled = 0;
+ else if (strncmp(p, "nocma", 5) == 0) {
+ fw_dump.fadump_enabled = 1;
+ fw_dump.nocma = 1;
+ }
return 0;
}
@@ -495,59 +592,6 @@
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static int register_fw_dump(struct fadump_mem_struct *fdm)
-{
- int rc, err;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
-
- } while (wait_time);
-
- err = -EIO;
- switch (rc) {
- default:
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
- break;
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while "
- "registering fadump\n");
-
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- err = -EINVAL;
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- err = -EEXIST;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- err = 0;
- break;
- }
- return err;
-}
-
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
@@ -590,71 +634,10 @@
fdh->online_mask = *cpu_online_mask;
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
+ fw_dump.ops->fadump_trigger(fdh, str);
}
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
- reg_entry++;
- }
- reg_entry++;
- return reg_entry;
-}
-
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -669,7 +652,7 @@
return buf;
}
-static void fadump_update_elfcore_header(char *bufp)
+void fadump_update_elfcore_header(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
@@ -681,7 +664,7 @@
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
@@ -689,228 +672,100 @@
return;
}
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+static void *fadump_alloc_buffer(unsigned long size)
{
- void *vaddr;
+ unsigned long count, i;
struct page *page;
- unsigned long order, count, i;
+ void *vaddr;
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (!vaddr)
return NULL;
- count = 1 << order;
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
- SetPageReserved(page + i);
+ mark_page_reserved(page + i);
return vaddr;
}
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
-
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr =
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+ if (!fw_dump.cpu_notes_buf_vaddr) {
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+ fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
- fw_dump.cpu_notes_buf = __pa(note_buf);
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
-
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
-
- for (i = 0; i < num_cpus; i++) {
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, ®s);
- note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
- }
- }
- final_note(note_buf);
-
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
- }
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+ fw_dump.cpu_notes_buf_size,
+ fw_dump.cpu_notes_buf_vaddr);
return 0;
+}
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
+void fadump_free_cpu_notes_buf(void)
+{
+ if (!fw_dump.cpu_notes_buf_vaddr)
+ return;
+
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr = 0;
fw_dump.cpu_notes_buf_size = 0;
- return rc;
+}
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+ kfree(mrange_info->mem_ranges);
+ mrange_info->mem_ranges = NULL;
+ mrange_info->mem_ranges_sz = 0;
+ mrange_info->max_mem_ranges = 0;
}
/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
- */
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
-{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
- }
-
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
-
- return 0;
-}
-
-static void free_crash_memory_ranges(void)
-{
- kfree(crash_memory_ranges);
- crash_memory_ranges = NULL;
- crash_memory_ranges_size = 0;
- max_crash_mem_ranges = 0;
-}
-
-/*
- * Allocate or reallocate crash memory ranges array in incremental units
+ * Allocate or reallocate mem_ranges array in incremental units
* of PAGE_SIZE.
*/
-static int allocate_crash_memory_ranges(void)
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- struct fad_crash_memory_ranges *new_array;
+ struct fadump_memory_range *new_array;
u64 new_size;
- new_size = crash_memory_ranges_size + PAGE_SIZE;
- pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
- new_size);
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+ new_size, mrange_info->name);
- new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
if (new_array == NULL) {
- pr_err("Insufficient memory for setting up crash memory ranges\n");
- free_crash_memory_ranges();
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
+ mrange_info->name);
+ fadump_free_mem_ranges(mrange_info);
return -ENOMEM;
}
- crash_memory_ranges = new_array;
- crash_memory_ranges_size = new_size;
- max_crash_mem_ranges = (new_size /
- sizeof(struct fad_crash_memory_ranges));
+ mrange_info->mem_ranges = new_array;
+ mrange_info->mem_ranges_sz = new_size;
+ mrange_info->max_mem_ranges = (new_size /
+ sizeof(struct fadump_memory_range));
return 0;
}
-static inline int fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
{
- u64 start, size;
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
bool is_adjacent = false;
+ u64 start, size;
if (base == end)
return 0;
@@ -919,38 +774,41 @@
* Fold adjacent memory ranges to bring down the memory ranges/
* PT_LOAD segments count.
*/
- if (crash_mem_ranges) {
- start = crash_memory_ranges[crash_mem_ranges - 1].base;
- size = crash_memory_ranges[crash_mem_ranges - 1].size;
+ if (mrange_info->mem_range_cnt) {
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
if ((start + size) == base)
is_adjacent = true;
}
if (!is_adjacent) {
/* resize the array on reaching the limit */
- if (crash_mem_ranges == max_crash_mem_ranges) {
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
- ret = allocate_crash_memory_ranges();
+ ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
+
+ /* Update to the new resized array */
+ mem_ranges = mrange_info->mem_ranges;
}
start = base;
- crash_memory_ranges[crash_mem_ranges].base = start;
- crash_mem_ranges++;
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
+ mrange_info->mem_range_cnt++;
}
- crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
- (crash_mem_ranges - 1), start, end - 1, (end - start));
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
+ start, end - 1, (end - start));
return 0;
}
-static int fadump_exclude_reserved_area(unsigned long long start,
- unsigned long long end)
+static int fadump_exclude_reserved_area(u64 start, u64 end)
{
- unsigned long long ra_start, ra_end;
+ u64 ra_start, ra_end;
int ret = 0;
ra_start = fw_dump.reserve_dump_area_start;
@@ -958,18 +816,22 @@
if ((ra_start < end) && (ra_end > start)) {
if ((start < ra_start) && (end > ra_end)) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
if (ret)
return ret;
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
} else if (start < ra_start) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
} else if (ra_end < end) {
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
}
} else
- ret = fadump_add_crash_memory(start, end);
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
return ret;
}
@@ -1014,36 +876,36 @@
static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
- unsigned long long start, end;
- int ret;
+ u64 start, end;
+ int i, ret;
pr_debug("Setup crash memory ranges.\n");
- crash_mem_ranges = 0;
+ crash_mrange_info.mem_range_cnt = 0;
/*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
+ * Boot memory region(s) registered with firmware are moved to
+ * different location at the time of crash. Create separate program
+ * header(s) for this memory chunk(s) with the correct offset.
*/
- ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
- if (ret)
- return ret;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ start = fw_dump.boot_mem_addr[i];
+ end = start + fw_dump.boot_mem_sz[i];
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+ if (ret)
+ return ret;
+ }
for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
+ start = (u64)reg->base;
+ end = start + (u64)reg->size;
/*
- * skip the first memory chunk that is already added (RMA_START
- * through boot_memory_size). This logic needs a relook if and
- * when RMA_START changes to a non-zero value.
+ * skip the memory chunk that is already added
+ * (0 through boot_memory_top).
*/
- BUILD_BUG_ON(RMA_START != 0);
- if (start < fw_dump.boot_memory_size) {
- if (end > fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (start < fw_dump.boot_mem_top) {
+ if (end > fw_dump.boot_mem_top)
+ start = fw_dump.boot_mem_top;
else
continue;
}
@@ -1064,17 +926,35 @@
*/
static inline unsigned long fadump_relocate(unsigned long paddr)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
- else
- return paddr;
+ unsigned long raddr, rstart, rend, rlast, hole_size;
+ int i;
+
+ hole_size = 0;
+ rlast = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ rstart = fw_dump.boot_mem_addr[i];
+ rend = rstart + fw_dump.boot_mem_sz[i];
+ hole_size += (rstart - rlast);
+
+ if (paddr >= rstart && paddr < rend) {
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
+ break;
+ }
+
+ rlast = rend;
+ }
+
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+ return raddr;
}
static int fadump_create_elfcore_headers(char *bufp)
{
- struct elfhdr *elf;
+ unsigned long long raddr, offset;
struct elf_phdr *phdr;
- int i;
+ struct elfhdr *elf;
+ int i, j;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
@@ -1117,12 +997,14 @@
(elf->e_phnum)++;
/* setup PT_LOAD sections. */
+ j = 0;
+ offset = 0;
+ raddr = fw_dump.boot_mem_addr[0];
+ for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
+ u64 mbase, msize;
- for (i = 0; i < crash_mem_ranges; i++) {
- unsigned long long mbase, msize;
- mbase = crash_memory_ranges[i].base;
- msize = crash_memory_ranges[i].size;
-
+ mbase = crash_mrange_info.mem_ranges[i].base;
+ msize = crash_mrange_info.mem_ranges[i].size;
if (!msize)
continue;
@@ -1132,13 +1014,17 @@
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = mbase;
- if (mbase == RMA_START) {
+ if (mbase == raddr) {
/*
- * The entire RMA region will be moved by firmware
- * to the specified destination_address. Hence set
- * the correct offset.
+ * The entire real memory region will be moved by
+ * firmware to the specified destination_address.
+ * Hence set the correct offset.
*/
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
+ phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
+ if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
+ offset += fw_dump.boot_mem_sz[j];
+ raddr = fw_dump.boot_mem_addr[++j];
+ }
}
phdr->p_paddr = mbase;
@@ -1160,7 +1046,6 @@
if (!addr)
return 0;
- fw_dump.fadumphdr_addr = addr;
fdh = __va(addr);
addr += sizeof(struct fadump_crash_info_header);
@@ -1168,7 +1053,7 @@
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
- fdh->crashing_cpu = CPU_UNKNOWN;
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
return addr;
}
@@ -1190,7 +1075,8 @@
if (ret)
return ret;
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
+ addr = fw_dump.fadumphdr_addr;
+
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
vaddr = __va(addr);
@@ -1199,75 +1085,27 @@
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- return register_fw_dump(&fdm);
-}
-
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Un-register firmware-assisted dump\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
- }
- fw_dump.dump_registered = 0;
- return 0;
-}
-
-static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Invalidating firmware-assisted dump registration\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
- return rc;
- }
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fw_dump.ops->fadump_register(&fw_dump);
}
void fadump_cleanup(void)
{
+ if (!fw_dump.fadump_supported)
+ return;
+
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- init_fadump_mem_struct(&fdm,
- be64_to_cpu(fdm_active->cpu_state_data.destination_address));
- fadump_invalidate_dump(&fdm);
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fw_dump.ops->fadump_invalidate(&fw_dump);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
- fadump_unregister_dump(&fdm);
- free_crash_memory_ranges();
+ fw_dump.ops->fadump_unregister(&fw_dump);
+ fadump_free_mem_ranges(&crash_mrange_info);
}
+
+ if (fw_dump.ops->fadump_cleanup)
+ fw_dump.ops->fadump_cleanup(&fw_dump);
}
static void fadump_free_reserved_memory(unsigned long start_pfn,
@@ -1292,90 +1130,197 @@
/*
* Skip memory holes and free memory that was actually reserved.
*/
-static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+static void fadump_release_reserved_area(u64 start, u64 end)
{
+ u64 tstart, tend, spfn, epfn;
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(start);
- unsigned long end_pfn = PHYS_PFN(end);
+ spfn = PHYS_PFN(start);
+ epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
+ tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
- if (tend == end_pfn)
+ if (tend == epfn)
break;
- start_pfn = tend + 1;
+ spfn = tend;
}
}
}
/*
- * Release the memory that was reserved in early boot to preserve the memory
- * contents. The released memory will be available for general use.
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
*/
-static void fadump_release_memory(unsigned long begin, unsigned long end)
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- unsigned long ra_start, ra_end;
+ struct fadump_memory_range *mem_ranges;
+ struct fadump_memory_range tmp_range;
+ u64 base, size;
+ int i, j, idx;
+
+ if (!reserved_mrange_info.mem_range_cnt)
+ return;
+
+ /* Sort the memory ranges */
+ mem_ranges = mrange_info->mem_ranges;
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+ idx = i;
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+ if (mem_ranges[idx].base > mem_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i) {
+ tmp_range = mem_ranges[idx];
+ mem_ranges[idx] = mem_ranges[i];
+ mem_ranges[i] = tmp_range;
+ }
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+ base = mem_ranges[i-1].base;
+ size = mem_ranges[i-1].size;
+ if (mem_ranges[i].base == (base + size))
+ mem_ranges[idx].size += mem_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ mem_ranges[idx] = mem_ranges[i];
+ }
+ }
+ mrange_info->mem_range_cnt = idx + 1;
+}
+
+/*
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
+ */
+static inline int fadump_scan_reserved_mem_ranges(void)
+{
+ struct device_node *root;
+ const __be32 *prop;
+ int len, ret = -1;
+ unsigned long i;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return ret;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ if (!prop)
+ return ret;
+
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ ret = fadump_add_mem_range(&reserved_mrange_info,
+ base, base + size);
+ if (ret < 0) {
+ pr_warn("some reserved ranges are ignored!\n");
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
+{
+ u64 ra_start, ra_end, tstart;
+ int i, ret;
+
+ fadump_scan_reserved_mem_ranges();
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
+ * Add reserved dump area to reserved ranges list
+ * and exclude all these ranges while releasing memory.
*/
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0) {
+ /*
+ * Not enough memory to setup reserved ranges but the system is
+ * running shortage of memory. So, release all the memory except
+ * Reserved dump area (reused for next fadump registration).
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
+
+ return;
+ }
+
+ /* Get the reserved ranges list in order first. */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+ /* Exclude reserved ranges and release remaining memory */
+ tstart = begin;
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+ if (tstart >= ra_end)
+ continue;
+
+ if (tstart < ra_start)
+ fadump_release_reserved_area(tstart, ra_start);
+ tstart = ra_end;
+ }
+
+ if (tstart < end)
+ fadump_release_reserved_area(tstart, end);
}
static void fadump_invalidate_release_mem(void)
{
- unsigned long reserved_area_start, reserved_area_end;
- unsigned long destination_address;
-
mutex_lock(&fadump_mutex);
if (!fw_dump.dump_active) {
mutex_unlock(&fadump_mutex);
return;
}
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
fadump_cleanup();
mutex_unlock(&fadump_mutex);
- /*
- * Save the current reserved memory bounds we will require them
- * later for releasing the memory for general use.
- */
- reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
- /*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
- */
- fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+ fadump_free_cpu_notes_buf();
- fadump_release_memory(reserved_area_start, reserved_area_end);
- if (fw_dump.cpu_notes_buf) {
- fadump_cpu_notes_buf_free(
- (unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ /*
+ * Setup kernel metadata and initialize the kernel dump
+ * memory structure for FADump re-registration.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ pr_warn("Failed to setup kernel metadata!\n");
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
static ssize_t fadump_release_memory_store(struct kobject *kobj,
@@ -1426,7 +1371,7 @@
int ret = 0;
int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
if (kstrtoint(buf, 0, &input))
@@ -1439,13 +1384,15 @@
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fw_dump.ops->fadump_unregister(&fw_dump);
break;
case 1:
if (fw_dump.dump_registered == 1) {
- ret = -EEXIST;
- goto unlock_out;
+ /* Un-register Firmware-assisted dump */
+ fw_dump.ops->fadump_unregister(&fw_dump);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
@@ -1462,62 +1409,12 @@
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
-
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
return 0;
}
@@ -1531,17 +1428,7 @@
0644, fadump_register_show,
fadump_register_store);
-static int fadump_region_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fadump_region_show, inode->i_private);
-}
-
-static const struct file_operations fadump_region_fops = {
- .open = fadump_region_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(fadump_region);
static void fadump_init_files(void)
{
@@ -1598,14 +1485,77 @@
* if dump process fails then invalidate the registration
* and release memory before proceeding for re-registration.
*/
- if (process_fadump(fdm_active) < 0)
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+
fadump_init_files();
return 1;
}
subsys_initcall(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+ return 0;
+
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+ if (fw_dump.dump_active) {
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot memory to preserve crash data.
+ */
+ pr_info("Preserving crash data for processing in next boot.\n");
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
+ } else
+ pr_debug("FADump-aware kernel..\n");
+
+ return 1;
+}
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+ struct memblock_region *reg;
+ u64 mstart, msize;
+
+ for_each_memblock(memory, reg) {
+ mstart = reg->base;
+ msize = reg->size;
+
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
+ }
+
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
+
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return memblock_reserved_size() / PAGE_SIZE;
+}
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index 2eae447..cc4a5e3 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Extracted from cputable.c
*
@@ -6,11 +7,6 @@
* Modifications for ppc64:
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
* Copyright (C) 2005 Stephen Rothwell, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/export.h>
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 529dcc2..0bb991d 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* FPU support code, moved here from head.S so that it can be used
* by chips which use other head-whatever.S files.
@@ -6,12 +7,6 @@
* Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
* Copyright (C) 1996 Paul Mackerras.
* Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/reg.h>
@@ -63,6 +58,7 @@
REST_32FPVSRS(0, R4, R3)
blr
EXPORT_SYMBOL(load_fp_state)
+_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
/*
* Store FP state into memory, including FPSCR
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 61ca279..4a24f8f 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -13,12 +14,6 @@
* This file contains the low-level support and setup for the
* PowerPC platform, including trap and interrupt dispatch.
* (The PPC 8xx embedded CPUs use head_8xx.S instead.)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/init.h>
@@ -37,7 +32,18 @@
#include <asm/export.h>
#include <asm/feature-fixups.h>
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+#include "head_32.h"
+
+/* 601 only have IBAT */
+#ifdef CONFIG_PPC_BOOK3S_601
+#define LOAD_BAT(n, reg, RA, RB) \
+ li RA,0; \
+ mtspr SPRN_IBAT##n##U,RA; \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB
+#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -47,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
- beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
- mtspr SPRN_DBAT##n##L,RB; \
-1:
+ mtspr SPRN_DBAT##n##L,RB
+#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@@ -160,6 +165,10 @@
bl flush_tlbs
bl initial_bats
+ bl load_segment_registers
+#ifdef CONFIG_KASAN
+ bl early_hash_table
+#endif
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -176,10 +185,10 @@
bl reloc_offset
li r24,0 /* cpu# */
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
bl reloc_offset
bl init_idle_6xx
-#endif /* CONFIG_6xx */
+#endif /* CONFIG_PPC_BOOK3S_32 */
/*
@@ -205,7 +214,7 @@
*/
turn_on_mmu:
mfmsr r0
- ori r0,r0,MSR_DR|MSR_IR
+ ori r0,r0,MSR_DR|MSR_IR|MSR_RI
mtspr SPRN_SRR1,r0
lis r0,start_here@h
ori r0,r0,start_here@l
@@ -242,103 +251,6 @@
__secondary_hold_acknowledge:
.long -1
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
- stw r0,GPR0(r11); \
- lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
- addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
- stw r10,8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
- DO_KVM n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
putting it back to what it was (unknown_exception) when done. */
@@ -352,9 +264,8 @@
* registers that might have bad values includes all the GPRs
* and all the BATs. We indicate that we are in RTAS by putting
* a non-zero value, the address of the exception frame to use,
- * in SPRG2. The machine check handler checks SPRG2 and uses its
- * value if it is non-zero. If we ever needed to free up SPRG2,
- * we could use a field in the thread_info or thread_struct instead.
+ * in thread.rtas_sp. The machine check handler checks thread.rtas_sp
+ * and uses its value if it is non-zero.
* (Other exception handlers assume that r1 is a valid kernel stack
* pointer when we take an exception from supervisor mode.)
* -- paulus.
@@ -365,16 +276,15 @@
mtspr SPRN_SPRG_SCRATCH1,r11
mfcr r10
#ifdef CONFIG_PPC_CHRP
- mfspr r11,SPRN_SPRG_RTAS
- cmpwi 0,r11,0
- bne 7f
+ mfspr r11, SPRN_SPRG_THREAD
+ lwz r11, RTAS_SP(r11)
+ cmpwi cr1, r11, 0
+ bne cr1, 7f
#endif /* CONFIG_PPC_CHRP */
EXCEPTION_PROLOG_1
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
- mfspr r4,SPRN_SPRG_RTAS
- cmpwi cr1,r4,0
bne cr1,1f
#endif
EXC_XFER_STD(0x200, machine_check_exception)
@@ -389,11 +299,17 @@
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
+#ifdef CONFIG_PPC_KUAP
+ andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+#endif
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
+BEGIN_MMU_FTR_SECTION
bl hash_page
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
1: lwz r5,_DSISR(r11) /* get DSISR value */
mfspr r4,SPRN_DAR
EXC_XFER_LITE(0x300, handle_page_fault)
@@ -408,7 +324,9 @@
beq 1f /* if so, try to put a PTE */
li r3,0 /* into the hash table */
mr r4,r12 /* SRR0 is fault address */
+BEGIN_MMU_FTR_SECTION
bl hash_page
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
1: mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -426,7 +344,7 @@
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -447,24 +365,23 @@
bl load_up_fpu /* if from user, just load it up */
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+ EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
/* System call */
. = 0xc00
DO_KVM 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
/*
* The Altivec unavailable trap is at 0x0f20. Foo.
@@ -496,18 +413,22 @@
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
- lwz r2,PGDIR(r2)
+#endif
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#else
+ li r1,_PAGE_PRESENT | _PAGE_EXEC
+#endif
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+#endif
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
@@ -515,20 +436,10 @@
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
- ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1, r1, 0xe06 /* clear out reserved bits */
+ andc r1, r0, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -572,16 +483,16 @@
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
- lwz r2,PGDIR(r2)
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+ li r1, _PAGE_PRESENT
+#endif
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
@@ -589,20 +500,16 @@
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
+ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -656,16 +563,16 @@
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
- lwz r2,PGDIR(r2)
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
+#endif
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
@@ -673,16 +580,14 @@
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r0,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- li r1,0xe05 /* clear out reserved bits & PP lsb */
- andc r1,r0,r1 /* PP = user? 2: 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ li r1,0xe06 /* clear out reserved bits & PP msb */
+ andc r1,r0,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -708,35 +613,35 @@
#define altivec_assist_exception unknown_exception
#endif
- EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
+ EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
- EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
+ EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
. = 0x3000
@@ -748,7 +653,7 @@
b fast_exception_return
#endif /* CONFIG_ALTIVEC */
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
+ EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
PerformanceMonitor:
EXCEPTION_PROLOG
@@ -836,17 +741,17 @@
lis r3,-KERNELBASE@h
mr r4,r24
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
lis r3,-KERNELBASE@h
bl init_idle_6xx
-#endif /* CONFIG_6xx */
+#endif /* CONFIG_PPC_BOOK3S_32 */
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- tophys(r1,r1)
- lwz r1,secondary_ti@l(r1)
- tophys(r2,r1)
- lwz r2,TI_TASK(r2)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ tophys(r2,r2)
+ lwz r2,secondary_current@l(r2)
+ tophys(r1,r2)
+ lwz r1,TASK_STACK(r1)
/* stack */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
@@ -855,14 +760,16 @@
stw r0,0(r3)
/* load up the MMU */
+ bl load_segment_registers
bl load_up_mmu
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
- li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+ lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+ mtspr SPRN_SPRG_PGDIR, r4
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
@@ -880,20 +787,33 @@
/*
* Those generic dummy functions are kept for CPUs not
- * included in CONFIG_6xx
+ * included in CONFIG_PPC_BOOK3S_32
*/
-#if !defined(CONFIG_6xx)
+#if !defined(CONFIG_PPC_BOOK3S_32)
_ENTRY(__save_cpu_setup)
blr
_ENTRY(__restore_cpu_setup)
blr
-#endif /* !defined(CONFIG_6xx) */
-
+#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
+#ifdef CONFIG_KASAN
+early_hash_table:
+ sync /* Force all PTE updates to finish */
+ isync
+ tlbia /* Clear all TLB entries */
+ sync /* wait for tlbia/tlbie to finish */
+ TLBSYNC /* ... on all CPUs */
+ /* Load the SDR1 register (hash table base & size) */
+ lis r6, early_hash - PAGE_OFFSET@h
+ ori r6, r6, 3 /* 256kB table */
+ mtspr SPRN_SDR1, r6
+ blr
+#endif
+
load_up_mmu:
sync /* Force all PTE updates to finish */
isync
@@ -905,20 +825,9 @@
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
- li r0,16 /* load up segment register values */
- mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
- li r4,0
-3: mtsrin r3,r4
- addi r3,r3,0x111 /* increment VSID */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
- mfpvr r3
- srwi r3,r3,16
- cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -934,6 +843,32 @@
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+load_segment_registers:
+ li r0, NUM_USER_SEGMENTS /* load up user segment register values */
+ mtctr r0 /* for context 0 */
+ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
+ li r4, 0
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+ mtctr r0 /* for context 0 */
+ rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */
+ rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */
+ oris r3, r3, SR_KP@h /* Kp = 1 */
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ blr
+
/*
* This is where the main kernel code starts.
*/
@@ -946,8 +881,9 @@
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
- li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+ lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+ mtspr SPRN_SPRG_PGDIR, r4
/* stack */
lis r1,init_thread_union@ha
@@ -958,11 +894,19 @@
* Do early platform-specific initialization,
* and set up the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
bl __save_cpu_setup
bl MMU_init
+#ifdef CONFIG_KASAN
+BEGIN_MMU_FTR_SECTION
+ bl MMU_init_hw_patch
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
/*
* Go back to running unmapped so we can load up new values
@@ -1014,19 +958,25 @@
blt- 4f
mulli r3,r3,897 /* multiply context by skew factor */
rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
- addis r3,r3,0x6000 /* Set Ks, Ku bits */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
li r0,NUM_USER_SEGMENTS
mtctr r0
+ lwz r4, MM_PGD(r4)
#ifdef CONFIG_BDI_SWITCH
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is passed as second argument.
*/
- lwz r4,MM_PGD(r4)
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+ lis r5, abatron_pteptrs@ha
+ stw r4, abatron_pteptrs@l + 0x4(r5)
#endif
+ tophys(r4, r4)
+ mtspr SPRN_SPRG_PGDIR, r4
li r4,0
isync
3:
@@ -1053,11 +1003,8 @@
*/
clear_bats:
li r10,0
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi r9, 1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1066,7 +1013,7 @@
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-1:
+#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1101,6 +1048,41 @@
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+_ENTRY(update_bats)
+ lis r4, 1f@h
+ ori r4, r4, 1f@l
+ tophys(r4, r4)
+ mfmsr r6
+ mflr r7
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+ rlwinm r0, r6, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ mtspr SPRN_SRR0, r4
+ mtspr SPRN_SRR1, r3
+ SYNC
+ RFI
+1: bl clear_bats
+ lis r3, BATS@ha
+ addi r3, r3, BATS@l
+ tophys(r3, r3)
+ LOAD_BAT(0, r3, r4, r5)
+ LOAD_BAT(1, r3, r4, r5)
+ LOAD_BAT(2, r3, r4, r5)
+ LOAD_BAT(3, r3, r4, r5)
+BEGIN_MMU_FTR_SECTION
+ LOAD_BAT(4, r3, r4, r5)
+ LOAD_BAT(5, r3, r4, r5)
+ LOAD_BAT(6, r3, r4, r5)
+ LOAD_BAT(7, r3, r4, r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ mtmsr r3
+ mtspr SPRN_SRR0, r7
+ mtspr SPRN_SRR1, r6
+ SYNC
+ RFI
+
flush_tlbs:
lis r10, 0x40
1: addic. r10, r10, -0x1000
@@ -1126,10 +1108,7 @@
*/
initial_bats:
lis r11,PAGE_OFFSET@h
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- bne 4f
+#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@@ -1142,10 +1121,8 @@
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
- isync
- blr
-
-4: tophys(r8,r11)
+#else
+ tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1157,10 +1134,10 @@
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
+#endif
isync
blr
-
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@@ -1175,15 +1152,13 @@
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
- blr
-1: mtspr SPRN_IBAT3L,r8
+#else
+ mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
+#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
new file mode 100644
index 0000000..8abc778
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __HEAD_32_H__
+#define __HEAD_32_H__
+
+#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+
+/*
+ * Exception entry code. This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+
+.macro EXCEPTION_PROLOG
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfcr r10
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2
+.endm
+
+.macro EXCEPTION_PROLOG_1
+ mfspr r11,SPRN_SRR1 /* check whether user or kernel */
+ andi. r11,r11,MSR_PR
+ tophys(r11,r1) /* use tophys(r1) if kernel */
+ beq 1f
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,TASK_STACK-THREAD(r11)
+ addi r11,r11,THREAD_SIZE
+ tophys(r11,r11)
+1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+.endm
+
+.macro EXCEPTION_PROLOG_2
+ stw r10,_CCR(r11) /* save registers */
+ stw r12,GPR12(r11)
+ stw r9,GPR9(r11)
+ mfspr r10,SPRN_SPRG_SCRATCH0
+ stw r10,GPR10(r11)
+ mfspr r12,SPRN_SPRG_SCRATCH1
+ stw r12,GPR11(r11)
+ mflr r10
+ stw r10,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ stw r0,GPR0(r11)
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r10,8(r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+.endm
+
+.macro SYSCALL_ENTRY trapno
+ mfspr r12,SPRN_SPRG_THREAD
+ mfcr r10
+ lwz r11,TASK_STACK-THREAD(r12)
+ mflr r9
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
+ rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ tophys(r11,r11)
+ stw r10,_CCR(r11) /* save registers */
+ mfspr r10,SPRN_SRR0
+ stw r9,_LINK(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+ stw r10,_NIP(r11)
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r10,8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r12,-THREAD
+ stw r11,PT_REGS(r12)
+#if defined(CONFIG_40x)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r12)
+ andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+#endif
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
+#endif
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define START_EXCEPTION(n, label) \
+ . = n; \
+ DO_KVM n; \
+label:
+
+#else
+#define START_EXCEPTION(n, label) \
+ . = n; \
+label:
+
+#endif
+
+#define EXCEPTION(n, label, hdlr, xfer) \
+ START_EXCEPTION(n, label) \
+ EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
+ bl tfer; \
+ .long hdlr; \
+ .long ret
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
+ ret_from_except)
+
+#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index b19d784..585ea19 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
* Initial PowerPC version.
@@ -18,17 +19,10 @@
* frank_rowand@mvista.com or source@mvista.com
* debbie_chu@mvista.com
*
- *
* Module name: head_4xx.S
*
* Description:
* Kernel execution entry point code.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/init.h>
@@ -44,6 +38,8 @@
#include <asm/export.h>
#include <asm/asm-405.h>
+#include "head_32.h"
+
/* As with the other PowerPC ports, it is expected that when code
* execution begins here, the following registers contain valid, yet
* optional, information:
@@ -99,46 +95,6 @@
.space 4
/*
- * Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
- * the physical address of the current task thread_struct.
- * Note that we have to have decremented r1 before we write to any fields
- * of the exception frame, since a critical interrupt could occur at any
- * time, and it will write to the area immediately below the current r1.
- */
-#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mtspr SPRN_SPRG_SCRATCH2,r1; \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- beq 1f; \
- mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
- addi r1,r1,THREAD_SIZE; \
-1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
- tophys(r11,r1); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH2; \
- mfspr r12,SPRN_SRR0; \
- stw r10,GPR1(r11); \
- mfspr r9,SPRN_SRR1; \
- stw r10,0(r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
* Exception prolog for critical exceptions. This is a little different
* from the normal exception prolog above since a critical exception
* can potentially occur at any point during normal exception processing.
@@ -158,7 +114,7 @@
beq 1f; \
/* COMING FROM USER MODE */ \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
tophys(r11,r11); \
stw r10,_CCR(r11); /* save various registers */\
@@ -177,6 +133,9 @@
tovirt(r1,r11); \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
stw r0,GPR0(r11); \
+ lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
+ stw r10, 8(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -196,53 +155,12 @@
/*
* Exception vectors.
*/
-#define START_EXCEPTION(n, label) \
- . = n; \
-label:
-
-#define EXCEPTION(n, label, hdlr, xfer) \
- START_EXCEPTION(n, label); \
- NORMAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
#define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label); \
CRITICAL_EXCEPTION_PROLOG; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- copyee(r10, r9); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
+ crit_transfer_to_handler, ret_from_crit_exc)
/*
* 0x0100 - Critical Interrupt Exception
@@ -393,7 +311,7 @@
* This is caused by a fetch from non-execute or guarded pages.
*/
START_EXCEPTION(0x0400, InstructionAccess)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mr r4,r12 /* Pass SRR0 as arg2 */
li r5,0 /* Pass zero as arg3 */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -403,33 +321,32 @@
/* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
stw r4,_DEAR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r4,SPRN_ESR /* Grab the ESR and save it */
stw r4,_ESR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x700, program_check_exception)
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
/* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
- NORMAL_EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
/* 0x1000 - Programmable Interval Timer (PIT) Exception */
. = 0x1000
@@ -646,25 +563,25 @@
mfspr r10, SPRN_SPRG_SCRATCH0
b InstructionAccess
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
#ifdef CONFIG_IBM405_ERR51
/* 405GP errata 51 */
START_EXCEPTION(0x1700, Trap_17)
b DTLBMiss
#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -726,11 +643,11 @@
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_TEMPLATE(DebugException, 0x2002, \
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
Decrementer:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
lis r0,TSR_PIS@h
mtspr SPRN_TSR,r0 /* Clear the PIT exception */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -738,9 +655,9 @@
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
FITException:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_EE(0x1010, unknown_exception)
+ EXC_XFER_STD(0x1010, unknown_exception)
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
WDTException:
@@ -748,15 +665,14 @@
addi r3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- NOCOPY, crit_transfer_to_handler,
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
/*
* The other Data TLB exceptions bail out to this point
* if they can't resolve the lightweight TLB fault.
*/
DataAccess:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -848,6 +764,9 @@
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -953,9 +872,8 @@
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is the second parameter.
*/
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+ lis r5, abatron_pteptrs@ha
+ stw r4, abatron_pteptrs@l + 0x4(r5)
#endif
sync
mtspr SPRN_PID,r3
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 37e4a7c..51dd01a 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Kernel execution entry point code.
*
@@ -21,11 +22,6 @@
* debbie_chu@mvista.com
* Copyright 2002-2005 MontaVista Software, Inc.
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
@@ -40,6 +36,7 @@
#include <asm/ptrace.h>
#include <asm/synch.h>
#include <asm/export.h>
+#include <asm/code-patching-asm.h>
#include "head_booke.h"
@@ -202,6 +199,9 @@
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -277,16 +277,15 @@
FP_UNAVAILABLE_EXCEPTION
#else
EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
- FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
- AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
@@ -294,7 +293,7 @@
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
@@ -382,10 +381,9 @@
/* Increment, rollover, and store TLB index */
addi r13,r13,1
+ patch_site 0f, patch__tlb_44x_hwater_D
/* Compare with watermark (instruction gets patched) */
- .globl tlb_44x_patch_hwater_D
-tlb_44x_patch_hwater_D:
- cmpwi 0,r13,1 /* reserve entries */
+0: cmpwi 0,r13,1 /* reserve entries */
ble 5f
li r13,0
5:
@@ -478,10 +476,9 @@
/* Increment, rollover, and store TLB index */
addi r13,r13,1
+ patch_site 0f, patch__tlb_44x_hwater_I
/* Compare with watermark (instruction gets patched) */
- .globl tlb_44x_patch_hwater_I
-tlb_44x_patch_hwater_I:
- cmpwi 0,r13,1 /* reserve entries */
+0: cmpwi 0,r13,1 /* reserve entries */
ble 5f
li r13,0
5:
@@ -1020,10 +1017,10 @@
/* Now we can get our task struct and real stack pointer */
- /* Get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* Get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* Current stack pointer */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4898e94..ad79fdd 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -15,11 +16,6 @@
* This file contains the entry point for the 64-bit kernel along
* with some early initialization code common to all 64-bit powerpc
* variants.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -186,7 +182,8 @@
isync
bctr
#else
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@@ -639,7 +636,7 @@
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@@ -801,21 +798,19 @@
/* Set thread priority to MEDIUM */
HMT_MEDIUM
- /* Initialize the kernel stack */
- LOAD_REG_ADDR(r3, current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r14,r3,r28
- addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
- std r14,PACAKSAVE(r13)
-
- /* Do early setup for that CPU (SLB and hash table pointer) */
+ /*
+ * Do early setup for this CPU, in particular initialising the MMU so we
+ * can turn it on below. This is a call to C, which is OK, we're still
+ * running on the emergency stack.
+ */
bl early_setup_secondary
/*
- * setup the new stack pointer, but *don't* use this until
- * translation is on.
+ * The primary has initialized our kernel stack for us in the paca, grab
+ * it and put it in r1. We must *not* use it until we turn on the MMU
+ * below, because it may not be inside the RMO.
*/
- mr r1, r14
+ ld r1, PACAKSAVE(r13)
/* Clear backchain so we get nice backtraces */
li r7,0
@@ -906,6 +901,7 @@
/*
* This is where the main kernel code starts.
*/
+__REF
start_here_multiplatform:
/* set up the TOC */
bl relative_toc
@@ -970,7 +966,9 @@
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl early_setup /* also sets r13 and SPRG_PACA */
+ LOAD_REG_ADDR(r12, DOTSYM(early_setup))
+ mtctr r12
+ bctrl /* also sets r13 and SPRG_PACA */
LOAD_REG_ADDR(r3, start_here_common)
ld r4,PACAKMSR(r13)
@@ -979,6 +977,7 @@
RFI
b . /* prevent speculative execution */
+ .previous
/* This is where all platforms converge execution */
start_here_common:
@@ -1000,7 +999,8 @@
bl start_kernel
/* Not reached */
- BUG_OPCODE
+ trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 81d4574..19f583e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,15 +12,10 @@
*
* This file contains low-level support and setup for PowerPC 8xx
* embedded processors, including trap and interrupt dispatch.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/init.h>
+#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -31,6 +27,9 @@
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/export.h>
+#include <asm/code-patching-asm.h>
+
+#include "head_32.h"
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
@@ -105,98 +104,22 @@
mtspr SPRN_SRR0,r0
rfi /* enables MMU */
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0, r10; \
- mtspr SPRN_SPRG_SCRATCH1, r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+#ifdef CONFIG_PERF_EVENTS
+ .align 4
+ .globl itlb_miss_counter
+itlb_miss_counter:
+ .space 4
-#define EXCEPTION_PROLOG_2 \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- mtmsr r10; \
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
+ .globl dtlb_miss_counter
+dtlb_miss_counter:
+ .space 4
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
+ .globl instruction_counter
+instruction_counter:
+ .space 4
+#endif
/* System reset */
EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
@@ -240,7 +163,7 @@
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -252,29 +175,39 @@
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
/* System call */
. = 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
+/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+#endif
+
. = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
- * TLB. The task switch loads the M_TW register with the pointer to the first
+ * TLB. The task switch loads the M_TWB register with the pointer to the first
* level table.
* If we discover there is no second level table (value is zero) or if there
* is an invalid pte, we load that into the TLB, which causes another fault
@@ -284,185 +217,164 @@
*/
#ifdef CONFIG_8xx_CPU15
-#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \
- addi tmp, addr, PAGE_SIZE; \
- tlbie tmp; \
- addi tmp, addr, -PAGE_SIZE; \
- tlbie tmp
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \
+ addi addr, addr, PAGE_SIZE; \
+ tlbie addr; \
+ addi addr, addr, -(PAGE_SIZE << 1); \
+ tlbie addr; \
+ addi addr, addr, PAGE_SIZE
#else
-#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr)
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)
#endif
InstructionTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mtspr SPRN_SPRG_SCRATCH1, r11
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtspr SPRN_SPRG_SCRATCH2, r12
#endif
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
- INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
+ INVALIDATE_ADJACENT_PAGES_CPU15(r10)
+ mtspr SPRN_MD_EPN, r10
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfcr r12
-#endif
#ifdef ITLB_MISS_KERNEL
+ mfcr r11
#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
- andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+ cmpi cr0, r10, 0 /* Address >= 0x80000000 */
#else
- rlwinm r11, r10, 16, 0xfff8
- cmpli cr0, r11, PAGE_OFFSET@h
+ rlwinm r10, r10, 16, 0xfff8
+ cmpli cr0, r10, PAGE_OFFSET@h
#ifndef CONFIG_PIN_TLB_TEXT
- /* It is assumed that kernel code fits into the first 8M page */
-_ENTRY(ITLBMiss_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+ /* It is assumed that kernel code fits into the first 32M */
+0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
+ patch_site 0b, patch__itlbmiss_linmem_top
#endif
#endif
#endif
- mfspr r11, SPRN_M_TW /* Get level 1 table */
+ mfspr r10, SPRN_M_TWB /* Get level 1 table */
#ifdef ITLB_MISS_KERNEL
#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
- beq+ 3f
+ bge+ 3f
#else
blt+ 3f
#endif
#ifndef CONFIG_PIN_TLB_TEXT
blt cr7, ITLBMissLinear
#endif
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+ rlwinm r10, r10, 0, 20, 31
+ oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
#endif
- /* Insert level 1 index */
- rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
- lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
+ lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
+ mtspr SPRN_MI_TWC, r10 /* Set segment attributes */
- /* Extract level 2 index */
- rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
-#ifdef CONFIG_HUGETLB_PAGE
- mtcr r11
- bt- 28, 10f /* bit 28 = Large page (8M) */
- bt- 29, 20f /* bit 29 = Large page (8M or 512k) */
-#endif
- rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
+ mtspr SPRN_MD_TWC, r10
+ mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
-4:
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtcr r12
+#ifdef ITLB_MISS_KERNEL
+ mtcr r11
#endif
- /* Load the MI_TWC with the attributes for this "segment." */
- mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
-
#ifdef CONFIG_SWAP
rlwinm r11, r10, 32-5, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
#endif
- li r11, RPN_PATTERN | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
* Software indicator bits 22, 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
- rlwimi r11, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
- rlwimi r10, r11, 0, 0x0ff0 /* Set 22, 24-27, clear 20,23 */
+ rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */
+ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
+ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-_ENTRY(itlb_miss_exit_1)
- mfspr r10, SPRN_SPRG_SCRATCH0
+0: mfspr r10, SPRN_SPRG_SCRATCH0
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mfspr r11, SPRN_SPRG_SCRATCH1
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
+ patch_site 0b, patch__itlbmiss_exit_1
+
#ifdef CONFIG_PERF_EVENTS
-_ENTRY(itlb_miss_perf)
- lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
-#endif
+ patch_site 0f, patch__itlbmiss_perf
+0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
mfspr r10, SPRN_SPRG_SCRATCH0
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mfspr r11, SPRN_SPRG_SCRATCH1
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
-
-#ifdef CONFIG_HUGETLB_PAGE
-10: /* 8M pages */
-#ifdef CONFIG_PPC_16K_PAGES
- /* Extract level 2 index */
- rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29
- /* Add level 2 base */
- rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1
-#else
- /* Level 2 base */
- rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
- lwz r10, 0(r10) /* Get the pte */
- b 4b
-20: /* 512k pages */
- /* Extract level 2 index */
- rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29
- /* Add level 2 base */
- rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
- lwz r10, 0(r10) /* Get the pte */
- b 4b
+#ifndef CONFIG_PIN_TLB_TEXT
+ITLBMissLinear:
+ mtcr r11
+#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
+ patch_site 0f, patch__itlbmiss_linmem_top8
+
+ mfspr r10, SPRN_SRR0
+0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
+ rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
+ ori r11, r11, MI_PS512K | MI_SVALID
+ rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
+#else
+ /* Set 8M byte page and mark it valid */
+ li r11, MI_PS8MEG | MI_SVALID
+ rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#endif
+ mtspr SPRN_MI_TWC, r11
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
+ _PAGE_PRESENT
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+ patch_site 0b, patch__itlbmiss_exit_2
#endif
. = 0x1200
DataStoreTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
mtspr SPRN_SPRG_SCRATCH1, r11
- mtspr SPRN_SPRG_SCRATCH2, r12
- mfcr r12
+ mfcr r11
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r11, r10, 16, 0xfff8
- cmpli cr0, r11, PAGE_OFFSET@h
- mfspr r11, SPRN_M_TW /* Get level 1 table */
- blt+ 3f
- rlwinm r11, r10, 16, 0xfff8
+ rlwinm r10, r10, 16, 0xfff8
+ cmpli cr0, r10, PAGE_OFFSET@h
#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr0, r11, VIRT_IMMR_BASE@h
+ cmpli cr6, r10, VIRT_IMMR_BASE@h
#endif
-_ENTRY(DTLBMiss_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
+ patch_site 0b, patch__dtlbmiss_linmem_top
+
+ mfspr r10, SPRN_M_TWB /* Get level 1 table */
+ blt+ 3f
#ifndef CONFIG_PIN_TLB_IMMR
-_ENTRY(DTLBMiss_jmp)
- beq- DTLBMissIMMR
+0: beq- cr6, DTLBMissIMMR
+ patch_site 0b, patch__dtlbmiss_immr_jmp
#endif
blt cr7, DTLBMissLinear
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+ rlwinm r10, r10, 0, 20, 31
+ oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
-
- /* Insert level 1 index */
- rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
- lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
-
- /* We have a pte table, so load fetch the pte from the table.
- */
- /* Extract level 2 index */
- rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
-#ifdef CONFIG_HUGETLB_PAGE
mtcr r11
- bt- 28, 10f /* bit 28 = Large page (8M) */
- bt- 29, 20f /* bit 29 = Large page (8M or 512k) */
-#endif
- rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
+ lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
+
+ mtspr SPRN_MD_TWC, r11
+ mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
-4:
- mtcr r12
/* Insert the Guarded flag into the TWC from the Linux PTE.
* It is bit 27 of both the Linux PTE and the TWC (at least
@@ -498,45 +410,67 @@
/* Restore registers */
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_1)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r12, SPRN_SPRG_SCRATCH2
- rfi
-#ifdef CONFIG_PERF_EVENTS
-_ENTRY(dtlb_miss_perf)
- lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
-#endif
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r12, SPRN_SPRG_SCRATCH2
- rfi
-#ifdef CONFIG_HUGETLB_PAGE
-10: /* 8M pages */
- /* Extract level 2 index */
-#ifdef CONFIG_PPC_16K_PAGES
- rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29
- /* Add level 2 base */
- rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1
+0: mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+ patch_site 0b, patch__dtlbmiss_exit_1
+
+DTLBMissIMMR:
+ mtcr r11
+ /* Set 512k byte guarded page and mark it valid */
+ li r10, MD_PS512K | MD_GUARDED | MD_SVALID
+ mtspr SPRN_MD_TWC, r10
+ mfspr r10, SPRN_IMMR /* Get current IMMR */
+ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
+ _PAGE_PRESENT | _PAGE_NO_CACHE
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
+
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+ patch_site 0b, patch__dtlbmiss_exit_2
+
+DTLBMissLinear:
+ mtcr r11
+ rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
+ patch_site 0f, patch__dtlbmiss_romem_top8
+
+0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
+ rlwinm r11, r11, 0, 0xff800000
+ neg r10, r11
+ or r11, r11, r10
+ rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
+ ori r11, r11, MI_PS512K | MI_SVALID
+ mfspr r10, SPRN_MD_EPN
+ rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
#else
- /* Level 2 base */
- rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
+ /* Set 8M byte page and mark it valid */
+ li r11, MD_PS8MEG | MD_SVALID
#endif
- lwz r10, 0(r10) /* Get the pte */
- b 4b
+ mtspr SPRN_MD_TWC, r11
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ patch_site 0f, patch__dtlbmiss_romem_top
-20: /* 512k pages */
- /* Extract level 2 index */
- rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29
- /* Add level 2 base */
- rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
- lwz r10, 0(r10) /* Get the pte */
- b 4b
+0: subis r11, r10, 0
+ rlwimi r10, r11, 11, _PAGE_RO
#endif
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
+ _PAGE_PRESENT
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
+
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+ patch_site 0b, patch__dtlbmiss_exit_3
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
@@ -548,11 +482,11 @@
mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
andis. r10,r9,SRR1_ISI_NOPT@h
- beq+ 1f
+ beq+ .Litlbie
tlbie r4
-itlbie:
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
-1: EXC_XFER_LITE(0x400, handle_page_fault)
+.Litlbie:
+ EXC_XFER_LITE(0x400, handle_page_fault)
/* This is the data TLB error on the MPC8xx. This could be due to
* many reasons, including a dirty update to a pte. We bail out to
@@ -574,21 +508,21 @@
stw r5,_DSISR(r11)
mfspr r4,SPRN_DAR
andis. r10,r5,DSISR_NOHPTE@h
- beq+ 1f
+ beq+ .Ldtlbie
tlbie r4
-dtlbie:
-1: li r10,RPN_PATTERN
+.Ldtlbie:
+ li r10,RPN_PATTERN
mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
@@ -600,8 +534,8 @@
mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_SRR0
- cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l
- cmplwi cr7, r11, (itlbie - PAGE_OFFSET)@l
+ cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
beq- cr0, 11f
beq- cr7, 11f
EXCEPTION_PROLOG_1
@@ -610,7 +544,7 @@
mfspr r4,SPRN_BAR
stw r4,_DAR(r11)
mfspr r5,SPRN_DSISR
- EXC_XFER_EE(0x1c00, do_break)
+ EXC_XFER_STD(0x1c00, do_break)
11:
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
@@ -621,115 +555,54 @@
. = 0x1d00
InstructionBreakpoint:
mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- lis r10, (instruction_counter - PAGE_OFFSET)@ha
- lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, -1
- stw r11, (instruction_counter - PAGE_OFFSET)@l(r10)
+ lwz r10, (instruction_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, -1
+ stw r10, (instruction_counter - PAGE_OFFSET)@l(0)
lis r10, 0xffff
ori r10, r10, 0x01
mtspr SPRN_COUNTA, r10
mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
rfi
#else
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
#endif
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
. = 0x2000
-/*
- * Bottom part of DataStoreTLBMiss handlers for IMMR area and linear RAM.
- * not enough space in the DataStoreTLBMiss area.
- */
-DTLBMissIMMR:
- mtcr r12
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- mtspr SPRN_MD_TWC, r10
- mfspr r10, SPRN_IMMR /* Get current IMMR */
- rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
- _PAGE_PRESENT | _PAGE_NO_CACHE
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_2)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r12, SPRN_SPRG_SCRATCH2
- rfi
-
-DTLBMissLinear:
- mtcr r12
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
- mtspr SPRN_MD_TWC, r11
- rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_3)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r12, SPRN_SPRG_SCRATCH2
- rfi
-
-#ifndef CONFIG_PIN_TLB_TEXT
-ITLBMissLinear:
- mtcr r12
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID
- mtspr SPRN_MI_TWC, r11
- rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
-
-_ENTRY(itlb_miss_exit_2)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r12, SPRN_SPRG_SCRATCH2
- rfi
-#endif
-
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
- /* define if you don't want to use self modifying code */
-#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
- mtspr SPRN_SPRG_SCRATCH2, r10
+ mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
+ mtspr SPRN_MD_EPN, r10
rlwinm r11, r10, 16, 0xfff8
cmpli cr0, r11, PAGE_OFFSET@h
- mfspr r11, SPRN_M_TW /* Get level 1 table */
+ mfspr r11, SPRN_M_TWB /* Get level 1 table */
blt+ 3f
rlwinm r11, r10, 16, 0xfff8
-_ENTRY(FixupDAR_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+ patch_site 0b, patch__fixupdar_linmem_top
+
/* create physical page address from effective address */
tophys(r11, r10)
blt- cr7, 201f
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
- /* Insert level 1 index */
-3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
+ mfspr r11, SPRN_M_TWB /* Get level 1 table */
+ rlwinm r11, r11, 0, 20, 31
+ oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha
+3:
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
+ mtspr SPRN_MD_TWC, r11
mtcr r11
+ mfspr r11, SPRN_MD_TWC
+ lwz r11, 0(r11) /* Get the pte */
bt 28,200f /* bit 28 = Large page (8M) */
bt 29,202f /* bit 29 = Large page (8M or 512K) */
- rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */
- /* Insert level 2 index */
- rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
- lwz r11, 0(r11) /* Get the pte */
/* concat physical page address(r11) and page offset(r10) */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31
201: lwz r11,0(r11)
@@ -748,26 +621,15 @@
beq+ 142f
cmpwi cr0, r10, 1964 /* Is icbi? */
beq+ 142f
-141: mfspr r10,SPRN_SPRG_SCRATCH2
+141: mfspr r10,SPRN_M_TW
b DARFixed /* Nope, go back to normal TLB processing */
- /* concat physical page address(r11) and page offset(r10) */
200:
-#ifdef CONFIG_PPC_16K_PAGES
- rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1
- rlwimi r11, r10, 32 - (PAGE_SHIFT_8M - 2), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29
-#else
- rlwinm r11, r10, 0, ~HUGEPD_SHIFT_MASK
-#endif
- lwz r11, 0(r11) /* Get the pte */
/* concat physical page address(r11) and page offset(r10) */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
b 201b
202:
- rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
- rlwimi r11, r10, 32 - (PAGE_SHIFT_512K - 2), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29
- lwz r11, 0(r11) /* Get the pte */
/* concat physical page address(r11) and page offset(r10) */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31
b 201b
@@ -776,27 +638,6 @@
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
-#ifndef NO_SELF_MODIFYING_CODE
- andis. r10,r11,0x1f /* test if reg RA is r0 */
- li r10,modified_instr@l
- dcbtst r0,r10 /* touch for store */
- rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
- oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
- ori r11,r11,532
- stw r11,0(r10) /* store add/and instruction */
- dcbf 0,r10 /* flush new instr. to memory. */
- icbi 0,r10 /* invalidate instr. cache line */
- mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
- mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
- isync /* Wait until new instr is loaded from memory */
-modified_instr:
- .space 4 /* this is where the add instr. is stored */
- bne+ 143f
- subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
-143: mtdar r10 /* store faulting EA in DAR */
- mfspr r10,SPRN_SPRG_SCRATCH2
- b DARFixed /* Go back to normal TLB handling */
-#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -848,7 +689,7 @@
mfdar r11
mtctr r11 /* restore ctr reg from DAR */
mtdar r10 /* save fault EA to DAR */
- mfspr r10,SPRN_SPRG_SCRATCH2
+ mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
/* special handling for r10,r11 since these are modified already */
@@ -860,7 +701,6 @@
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
-#endif
/*
* This is where the main kernel code starts.
@@ -878,18 +718,24 @@
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
+ lis r0, STACK_END_MAGIC@h
+ ori r0, r0, STACK_END_MAGIC@l
+ stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
lis r6, swapper_pg_dir@ha
tophys(r6,r6)
- mtspr SPRN_M_TW, r6
+ mtspr SPRN_M_TWB, r6
bl early_init /* We have to do this with MMU on */
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -919,11 +765,12 @@
/* set up the PTE pointers for the Abatron bdiGDB.
*/
- tovirt(r6,r6)
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
stw r5, 0xf0(0) /* Must match your Abatron config file */
tophys(r5,r5)
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
stw r6, 0(r5)
/* Now turn on the MMU for real! */
@@ -952,28 +799,11 @@
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_TEXT
- lis r8, MI_RSV4I@h
- ori r8, r8, 0x1c00
-
- mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
-#endif
-
#ifdef CONFIG_PIN_TLB_DATA
oris r10, r10, MD_RSV4I@h
mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
#endif
- /* Now map the lower 8 Meg into the ITLB. */
- lis r8, KERNELBASE@h /* Create vaddr for TLB */
- ori r8, r8, MI_EVALID /* Mark it valid */
- mtspr SPRN_MI_EPN, r8
- li r8, MI_PS8MEG /* Set 8M byte page */
- ori r8, r8, MI_SVALID /* Make it valid */
- mtspr SPRN_MI_TWC, r8
- li r8, MI_BOOTINIT /* Create RPN for address 0 */
- mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
-
lis r8, MI_APG_INIT@h /* Set protection modes */
ori r8, r8, MI_APG_INIT@l
mtspr SPRN_MI_AP, r8
@@ -1003,6 +833,34 @@
mtspr SPRN_MD_RPN, r8
#endif
+ /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
+#ifdef CONFIG_PIN_TLB_TEXT
+ lis r8, MI_RSV4I@h
+ ori r8, r8, 0x1c00
+#endif
+ li r9, 4 /* up to 4 pages of 8M */
+ mtctr r9
+ lis r9, KERNELBASE@h /* Create vaddr for TLB */
+ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
+ li r11, MI_BOOTINIT /* Create RPN for address 0 */
+ lis r12, _einittext@h
+ ori r12, r12, _einittext@l
+1:
+#ifdef CONFIG_PIN_TLB_TEXT
+ mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+ addi r8, r8, 0x100
+#endif
+
+ ori r0, r9, MI_EVALID /* Mark it valid */
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r10
+ mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ addis r9, r9, 0x80
+ addis r11, r11, 0x80
+
+ cmpl cr0, r9, r12
+ bdnzf gt, 1b
+
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
* We should probably check/set other modes....later.
@@ -1055,19 +913,6 @@
/* Room for two PTE table poiners, usually the kernel and current user
* pointer to their respective root page table (pgdir).
*/
+ .globl abatron_pteptrs
abatron_pteptrs:
.space 8
-
-#ifdef CONFIG_PERF_EVENTS
- .globl itlb_miss_counter
-itlb_miss_counter:
- .space 4
-
- .globl dtlb_miss_counter
-dtlb_miss_counter:
- .space 4
-
- .globl instruction_counter
-instruction_counter:
- .space 4
-#endif
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index d0862a1..2ae635d 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -6,6 +6,8 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#ifdef __ASSEMBLY__
+
/*
* Macros used for common Book-e exception handling
*/
@@ -32,6 +34,16 @@
*/
#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define BOOKE_CLEAR_BTB(reg) \
+START_BTB_FLUSH_SECTION \
+ BTB_FLUSH(reg) \
+END_BTB_FLUSH_SECTION
+#else
+#define BOOKE_CLEAR_BTB(reg)
+#endif
+
+
#define NORMAL_EXCEPTION_PROLOG(intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
@@ -43,8 +55,9 @@
andi. r11, r11, MSR_PR; /* check whether user or kernel */\
mr r11, r1; \
beq 1f; \
+ BOOKE_CLEAR_BTB(r11) \
/* if from user, start at top of this thread's kernel stack */ \
- lwz r11, THREAD_INFO-THREAD(r10); \
+ lwz r11, TASK_STACK - THREAD(r10); \
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
stw r13, _CCR(r11); /* save various registers */ \
@@ -70,6 +83,101 @@
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
+.macro SYSCALL_ENTRY trapno intno srr1
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtspr SPRN_SPRG_WSCRATCH0, r10
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13 /* save CR in r13 for now */
+ mfspr r11, SPRN_SRR1
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, 1975f
+ b kvmppc_handler_\intno\()_\srr1
+1975:
+ mr r12, r13
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+#endif
+ mfcr r12
+#ifdef CONFIG_KVM_BOOKE_HV
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#endif
+ BOOKE_CLEAR_BTB(r11)
+ lwz r11, TASK_STACK - THREAD(r10)
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r11) /* save various registers */
+ mflr r12
+ stw r12,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ stw r1, GPR1(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1, 0(r11)
+ mr r1, r11
+ stw r12,_NIP(r11)
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+ lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r12, r12, STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r12, 8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r10,-THREAD
+ stw r11,PT_REGS(r10)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r10)
+ andis. r12,r12,DBCR0_IDM@h
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ lwz r10, TASK_CPU(r2)
+ slwi r10, r10, 3
+ add r11, r11, r10
+#endif
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ lis r10, MSR_KERNEL@h
+ ori r10, r10, MSR_KERNEL@l
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ lis r10, (MSR_KERNEL | MSR_EE)@h
+ ori r10, r10, (MSR_KERNEL | MSR_EE)@l
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
/* To handle the additional exception priority levels on 40x and Book-E
* processors we allocate a stack per additional priority level.
*
@@ -128,9 +236,10 @@
stw r9,_CCR(r8); /* save CR on stack */\
mfspr r11,exc_level_srr1; /* check whether user or kernel */\
DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
+ BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
@@ -143,13 +252,7 @@
stw r10,GPR11(r11); \
b 2f; \
/* COMING FROM PRIV MODE */ \
-1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
- lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
- stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
- lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
- mr r11,r8; \
+1: mr r11, r8; \
2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \
stw r12,GPR12(r11); /* save various registers */\
mflr r10; \
@@ -211,8 +314,7 @@
CRITICAL_EXCEPTION_PROLOG(intno); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
#define MCHECK_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(label); \
@@ -221,36 +323,23 @@
stw r5,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, mcheck_transfer_to_handler, \
- ret_from_mcheck_exc)
+ mcheck_transfer_to_handler, ret_from_mcheck_exc)
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
lis r10,msr@h; \
ori r10,r10,msr@l; \
- copyee(r10, r9); \
bl tfer; \
.long hdlr; \
.long ret
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
ret_from_except_full)
#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
/* Check for a single step debug exception while in an exception
@@ -317,7 +406,7 @@
/* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
@@ -370,7 +459,7 @@
/* continue normal handling for a critical exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
@@ -395,7 +484,7 @@
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE(0x0600, alignment_exception)
+ EXC_XFER_STD(0x0600, alignment_exception)
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
@@ -420,9 +509,9 @@
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
1: addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+ EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
-#ifndef __ASSEMBLY__
+#else /* __ASSEMBLY__ */
struct exception_regs {
unsigned long mas0;
unsigned long mas1;
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index e2750b8..adf0505 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Kernel execution entry point code.
*
@@ -23,11 +24,6 @@
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004 Freescale Semiconductor, Inc
* PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
@@ -194,13 +190,6 @@
#endif
mtspr SPRN_MAS4, r2
-#if 0
- /* Enable DOZE */
- mfspr r2,SPRN_HID0
- oris r2,r2,HID0_DOZE@h
- mtspr SPRN_HID0, r2
-#endif
-
#if !defined(CONFIG_BDI_SWITCH)
/*
* The Abatron BDI JTAG debugger does not tolerate others
@@ -243,8 +232,9 @@
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
- CURRENT_THREAD_INFO(r22, r1)
- stw r24, TI_CPU(r22)
+#ifdef CONFIG_SMP
+ stw r24, TASK_CPU(r2)
+#endif
bl early_init
@@ -274,6 +264,9 @@
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
mr r3,r30
mr r4,r31
bl machine_init
@@ -386,7 +379,7 @@
EXC_XFER_LITE(0x0300, handle_page_fault)
1:
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x0300, CacheLockingException)
+ EXC_XFER_LITE(0x0300, CacheLockingException)
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
@@ -407,21 +400,20 @@
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- program_check_exception, EXC_XFER_EE)
+ program_check_exception, EXC_XFER_STD)
#else
EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
@@ -429,7 +421,7 @@
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
@@ -453,6 +445,13 @@
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
+START_BTB_FLUSH_SECTION
+ mfspr r11, SPRN_SRR1
+ andi. r10,r11,MSR_PR
+ beq 1f
+ BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -547,6 +546,14 @@
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
+START_BTB_FLUSH_SECTION
+ mfspr r11, SPRN_SRR1
+ andi. r10,r11,MSR_PR
+ beq 1f
+ BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -624,25 +631,25 @@
bl load_up_spe
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x2010, KernelSPE)
+ EXC_XFER_LITE(0x2010, KernelSPE)
#elif defined(CONFIG_SPE_POSSIBLE)
EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
- SPEFloatingPointException, EXC_XFER_EE)
+ SPEFloatingPointException, EXC_XFER_STD)
/* SPE Floating Point Round */
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- SPEFloatingPointRoundException, EXC_XFER_EE)
+ SPEFloatingPointRoundException, EXC_XFER_STD)
#elif defined(CONFIG_SPE_POSSIBLE)
EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif /* CONFIG_SPE_POSSIBLE */
@@ -665,10 +672,10 @@
unknown_exception)
/* Hypercall */
- EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
/* Embedded Hypervisor Privilege */
- EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
interrupt_end:
@@ -702,8 +709,7 @@
/* Get the next_tlbcam_idx percpu var */
#ifdef CONFIG_SMP
- lwz r12, THREAD_INFO-THREAD(r12)
- lwz r15, TI_CPU(r12)
+ lwz r15, TASK_CPU-THREAD(r12)
lis r14, __per_cpu_offset@h
ori r14, r14, __per_cpu_offset@l
rlwinm r15, r15, 2, 0, 29
@@ -1074,10 +1080,10 @@
mr r4,r24 /* Why? */
bl call_setup_cpu
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* stack */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a67..1007ec3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -1,25 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
* using the CPU's debug registers. Derived from
* "arch/x86/kernel/hw_breakpoint.c"
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2010 IBM Corporation
* Author: K.Prasad <prasad@linux.vnet.ibm.com>
- *
*/
#include <linux/hw_breakpoint.h>
@@ -29,11 +15,15 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/hvcall.h>
#include <linux/uaccess.h>
/*
@@ -174,7 +164,7 @@
if (!ppc_breakpoint_available())
return -ENODEV;
length_max = 8; /* DABR */
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
if ((attr->bp_addr >> 9) !=
@@ -205,18 +195,63 @@
tsk->thread.last_hit_ubp = NULL;
}
+static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
+{
+ int ret, type;
+ struct instruction_op op;
+
+ ret = analyse_instr(&op, regs, instr);
+ type = GETTYPE(op.type);
+ return (!ret && (type == LARX || type == STCX));
+}
+
/*
* Handle debug exception notifications.
*/
+static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
+ unsigned long addr)
+{
+ unsigned int instr = 0;
+
+ if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+ goto fail;
+
+ if (is_larx_stcx_instr(regs, instr)) {
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
+ " Breakpoint at 0x%lx will be disabled.\n", addr);
+ goto disable;
+ }
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ current->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ return false;
+ }
+
+ if (!emulate_step(regs, instr))
+ goto fail;
+
+ return true;
+
+fail:
+ /*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", addr);
+
+disable:
+ perf_event_disable_inatomic(bp);
+ return false;
+}
+
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
-#ifndef CONFIG_PPC_8xx
- int stepped = 1;
- unsigned int instr;
-#endif
struct arch_hw_breakpoint *info;
unsigned long dar = regs->dar;
@@ -261,31 +296,9 @@
(dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-#ifndef CONFIG_PPC_8xx
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
goto out;
- }
- stepped = 0;
- instr = 0;
- if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
- stepped = emulate_step(regs, instr);
-
- /*
- * emulate_step() could not execute it. We've failed in reliably
- * handling the hw-breakpoint. Unregister it and throw a warning
- * message to let the user know about it.
- */
- if (!stepped) {
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", info->address);
- perf_event_disable_inatomic(bp);
- goto out;
- }
-#endif
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index d7216c9..a36fd05 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Idle daemon for PowerPC. Idle daemon will handle any action
* that needs to be taken when the system becomes idle.
@@ -12,11 +13,6 @@
* Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
*
* 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index ff026c9..0ffdd18 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains the power_save function for 6xx & 7xxx CPUs
* rewritten in assembler
@@ -6,11 +7,6 @@
* it will have PLL 1 set to low speed mode (used during NAP/DOZE).
* if this is not the case some additional changes will have to
* be done to check a runtime var (a bit like powersave-nap)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -136,10 +132,9 @@
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
+ lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
+ stw r8,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
mfmsr r7
ori r7,r7,MSR_EE
oris r7,r7,MSR_POW@h
@@ -159,8 +154,7 @@
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r11)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 4e0d94d..cc008de 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
*
* Generic idle routine for Book3E processors
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -63,7 +59,7 @@
1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
* to the right spot
*/
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACACURRENT(r13)
ld r10,TI_LOCAL_FLAGS(r11)
ori r10,r10,_TLF_NAPPING
std r10,TI_LOCAL_FLAGS(r11)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 7f5ac2e..d327519 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -1,956 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This file contains idle entry/exit functions for POWER7,
- * POWER8 and POWER9 CPUs.
+ * Copyright 2018, IBM Corporation.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * This file contains general idle entry/exit functions to save
+ * and restore stack and NVGPRs which allows C code to call idle
+ * states that lose GPRs, and it will return transparently with
+ * SRR1 wakeup reason return value.
+ *
+ * The platform / CPU caller must ensure SPRs and any other non-GPR
+ * state is saved and restored correctly, handle KVM, interrupts, etc.
*/
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
-#include <asm/hw_irq.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/opal.h>
#include <asm/cpuidle.h>
-#include <asm/exception-64s.h>
-#include <asm/book3s/64/mmu-hash.h>
-#include <asm/mmu.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
/*
- * Use unused space in the interrupt stack to save and restore
- * registers for winkle support.
- */
-#define _MMCR0 GPR0
-#define _SDR1 GPR3
-#define _PTCR GPR3
-#define _RPR GPR4
-#define _SPURR GPR5
-#define _PURR GPR6
-#define _TSCR GPR7
-#define _DSCR GPR8
-#define _AMOR GPR9
-#define _WORT GPR10
-#define _WORC GPR11
-#define _LPCR GPR12
-
-#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
-
- .text
-
-/*
- * Used by threads before entering deep idle states. Saves SPRs
- * in interrupt stack frame
- */
-save_sprs_to_stack:
- /*
- * Note all register i.e per-core, per-subcore or per-thread is saved
- * here since any thread in the core might wake up first
- */
-BEGIN_FTR_SECTION
- /*
- * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
- * SDR1 here
- */
- mfspr r3,SPRN_PTCR
- std r3,_PTCR(r1)
- mfspr r3,SPRN_LPCR
- std r3,_LPCR(r1)
-FTR_SECTION_ELSE
- mfspr r3,SPRN_SDR1
- std r3,_SDR1(r1)
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
- mfspr r3,SPRN_RPR
- std r3,_RPR(r1)
- mfspr r3,SPRN_SPURR
- std r3,_SPURR(r1)
- mfspr r3,SPRN_PURR
- std r3,_PURR(r1)
- mfspr r3,SPRN_TSCR
- std r3,_TSCR(r1)
- mfspr r3,SPRN_DSCR
- std r3,_DSCR(r1)
- mfspr r3,SPRN_AMOR
- std r3,_AMOR(r1)
- mfspr r3,SPRN_WORT
- std r3,_WORT(r1)
- mfspr r3,SPRN_WORC
- std r3,_WORC(r1)
-/*
- * On POWER9, there are idle states such as stop4, invoked via cpuidle,
- * that lose hypervisor resources. In such cases, we need to save
- * additional SPRs before entering those idle states so that they can
- * be restored to their older values on wakeup from the idle state.
+ * Desired PSSCR in r3
*
- * On POWER8, the only such deep idle state is winkle which is used
- * only in the context of CPU-Hotplug, where these additional SPRs are
- * reinitiazed to a sane value. Hence there is no need to save/restore
- * these SPRs.
- */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
-power9_save_additional_sprs:
- mfspr r3, SPRN_PID
- mfspr r4, SPRN_LDBAR
- std r3, STOP_PID(r13)
- std r4, STOP_LDBAR(r13)
-
- mfspr r3, SPRN_FSCR
- mfspr r4, SPRN_HFSCR
- std r3, STOP_FSCR(r13)
- std r4, STOP_HFSCR(r13)
-
- mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR0
- std r3, STOP_MMCRA(r13)
- std r4, _MMCR0(r1)
-
- mfspr r3, SPRN_MMCR1
- mfspr r4, SPRN_MMCR2
- std r3, STOP_MMCR1(r13)
- std r4, STOP_MMCR2(r13)
- blr
-
-power9_restore_additional_sprs:
- ld r3,_LPCR(r1)
- ld r4, STOP_PID(r13)
- mtspr SPRN_LPCR,r3
- mtspr SPRN_PID, r4
-
- ld r3, STOP_LDBAR(r13)
- ld r4, STOP_FSCR(r13)
- mtspr SPRN_LDBAR, r3
- mtspr SPRN_FSCR, r4
-
- ld r3, STOP_HFSCR(r13)
- ld r4, STOP_MMCRA(r13)
- mtspr SPRN_HFSCR, r3
- mtspr SPRN_MMCRA, r4
-
- ld r3, _MMCR0(r1)
- ld r4, STOP_MMCR1(r13)
- mtspr SPRN_MMCR0, r3
- mtspr SPRN_MMCR1, r4
-
- ld r3, STOP_MMCR2(r13)
- ld r4, PACA_SPRG_VDSO(r13)
- mtspr SPRN_MMCR2, r3
- mtspr SPRN_SPRG3, r4
- blr
-
-/*
- * Used by threads when the lock bit of core_idle_state is set.
- * Threads will spin in HMT_LOW until the lock bit is cleared.
- * r14 - pointer to core_idle_state
- * r15 - used to load contents of core_idle_state
- * r9 - used as a temporary variable
- */
-
-core_idle_lock_held:
- HMT_LOW
-3: lwz r15,0(r14)
- andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne 3b
- HMT_MEDIUM
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne- core_idle_lock_held
- blr
-
-/*
- * Pass requested state in r3:
- * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- * - Requested PSSCR value in POWER9
+ * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
*
- * Address of idle handler to branch to in realmode in r4
+ * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
+ * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
+ * and must blr, to return to caller with r3 set according to caller's expected
+ * return code (for Book3S/64 that is SRR1).
*/
-pnv_powersave_common:
- /* Use r3 to pass state nap/sleep/winkle */
- /* NAP is a state loss, we create a regs frame on the
- * stack, fill it up with the state we care about and
- * stick a pointer to it in PACAR1. We really only
- * need to save PC, some CR bits and the NV GPRs,
- * but for now an interrupt frame will do.
- */
- mtctr r4
+_GLOBAL(isa300_idle_stop_noloss)
+ mtspr SPRN_PSSCR,r3
+ PPC_STOP
+ li r3,0
+ blr
- mflr r0
- std r0,16(r1)
- stdu r1,-INT_FRAME_SIZE(r1)
- std r0,_LINK(r1)
- std r0,_NIP(r1)
-
- /* We haven't lost state ... yet */
- li r0,0
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* Continue saving state */
- SAVE_GPR(2, r1)
- SAVE_NVGPRS(r1)
- mfcr r5
- std r5,_CCR(r1)
+/*
+ * Desired PSSCR in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
+ */
+_GLOBAL(isa300_idle_stop_mayloss)
+ mtspr SPRN_PSSCR,r3
std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ /* 168 bytes */
+ PPC_STOP
+ b . /* catch bugs */
-BEGIN_FTR_SECTION
+/*
+ * Desired return value in r3
+ *
+ * The idle wakeup SRESET interrupt can call this after calling
+ * to return to the idle sleep function caller with r3 as the return code.
+ *
+ * This must not be used if idle was entered via a _noloss function (use
+ * a simple blr instead).
+ */
+_GLOBAL(idle_return_gpr_loss)
+ ld r1,PACAR1(r13)
+ ld r4,-8*19(r1)
+ ld r5,-8*20(r1)
+ mtlr r4
+ mtcr r5
/*
- * POWER9 does not require real mode to stop, and presently does not
- * set hwthread_state for KVM (threads don't share MMU context), so
- * we can remain in virtual mode for this.
+ * KVM nap requires r2 to be saved, rather than just restoring it
+ * from PACATOC. This could be avoided for that less common case
+ * if KVM saved its r2.
*/
- bctr
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- /*
- * POWER8
- * Go to real mode to do the nap, as required by the architecture.
- * Also, we need to be in real mode before setting hwthread_state,
- * because as soon as we do that, another thread can switch
- * the MMU context to the guest.
- */
- LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- mtmsrd r7,0
- bctr
+ ld r2,-8*0(r1)
+ ld r14,-8*1(r1)
+ ld r15,-8*2(r1)
+ ld r16,-8*3(r1)
+ ld r17,-8*4(r1)
+ ld r18,-8*5(r1)
+ ld r19,-8*6(r1)
+ ld r20,-8*7(r1)
+ ld r21,-8*8(r1)
+ ld r22,-8*9(r1)
+ ld r23,-8*10(r1)
+ ld r24,-8*11(r1)
+ ld r25,-8*12(r1)
+ ld r26,-8*13(r1)
+ ld r27,-8*14(r1)
+ ld r28,-8*15(r1)
+ ld r29,-8*16(r1)
+ ld r30,-8*17(r1)
+ ld r31,-8*18(r1)
+ blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ *
+ * The 0(r1) slot is used to save r2 in isa206, so use that here.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
+ std r2,0(r1); \
ptesync; \
- ld r0,0(r1); \
-236: cmpd cr0,r0,r0; \
+ ld r2,0(r1); \
+236: cmpd cr0,r2,r2; \
bne 236b; \
- IDLE_INST;
-
-
- .globl pnv_enter_arch207_idle_mode
-pnv_enter_arch207_idle_mode:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /******************************************************/
- /* N O T E W E L L ! ! ! N O T E W E L L */
- /* The following store to HSTATE_HWTHREAD_STATE(r13) */
- /* MUST occur in real mode, i.e. with the MMU off, */
- /* and the MMU must stay off until we clear this flag */
- /* and test HSTATE_HWTHREAD_REQ(r13) in */
- /* pnv_powersave_wakeup in this file. */
- /* The reason is that another thread can switch the */
- /* MMU to a guest context whenever this flag is set */
- /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
- /* that would potentially cause this thread to start */
- /* executing instructions from guest memory in */
- /* hypervisor mode, leading to a host crash or data */
- /* corruption, or worse. */
- /******************************************************/
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- stb r3,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr3,r3,PNV_THREAD_SLEEP
- bge cr3,2f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
- /* No return */
-2:
- /* Sleep or winkle */
- lbz r7,PACA_THREAD_MASK(r13)
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
- li r5,0
- beq cr3,3f
- lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
-3:
-lwarx_loop1:
- lwarx r15,0,r14
-
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
-
- add r15,r15,r5 /* Add if winkle */
- andc r15,r15,r7 /* Clear thread bit */
-
- andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
+ IDLE_INST; \
+ b . /* catch bugs */
/*
- * If cr0 = 0, then current thread is the last thread of the core entering
- * sleep. Last thread needs to execute the hardware bug workaround code if
- * required by the platform.
- * Make the workaround call unconditionally here. The below branch call is
- * patched out when the idle states are discovered if the platform does not
- * require it.
- */
-.global pnv_fastsleep_workaround_at_entry
-pnv_fastsleep_workaround_at_entry:
- beq fastsleep_workaround_at_entry
-
- stwcx. r15,0,r14
- bne- lwarx_loop1
- isync
-
-common_enter: /* common code for all the threads entering sleep or winkle */
- bgt cr3,enter_winkle
- IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-
-fastsleep_workaround_at_entry:
- oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- stwcx. r15,0,r14
- bne- lwarx_loop1
- isync
-
- /* Fast sleep workaround */
- li r3,1
- li r4,1
- bl opal_config_cpu_idle_state
-
- /* Unlock */
- xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- lwsync
- stw r15,0(r14)
- b common_enter
-
-enter_winkle:
- bl save_sprs_to_stack
-
- IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-
-/*
- * r3 - PSSCR value corresponding to the requested stop state.
- */
-power_enter_stop:
-/*
- * Check if we are executing the lite variant with ESL=EC=0
- */
- andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
- clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
- bne .Lhandle_esl_ec_set
- PPC_STOP
- li r3,0 /* Since we didn't lose state, return 0 */
- std r3, PACA_REQ_PSSCR(r13)
-
- /*
- * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
- * it can determine if the wakeup reason is an HMI in
- * CHECK_HMI_INTERRUPT.
- *
- * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
- * reason, so there is no point setting r12 to SRR1.
- *
- * Further, we clear r12 here, so that we don't accidentally enter the
- * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
- */
- li r12, 0
- b pnv_wakeup_noloss
-
-.Lhandle_esl_ec_set:
-BEGIN_FTR_SECTION
- /*
- * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
- * a state-loss idle. Saving and restoring MMCR0 over idle is a
- * workaround.
- */
- mfspr r4,SPRN_MMCR0
- std r4,_MMCR0(r1)
-END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
-
-/*
- * Check if the requested state is a deep idle state.
- */
- LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
- ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
- cmpd r3,r4
- bge .Lhandle_deep_stop
- PPC_STOP /* Does not return (system reset interrupt) */
-
-.Lhandle_deep_stop:
-/*
- * Entering deep idle state.
- * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
- * stack and enter stop
- */
- lbz r7,PACA_THREAD_MASK(r13)
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
-
-lwarx_loop_stop:
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
- andc r15,r15,r7 /* Clear thread bit */
-
- stwcx. r15,0,r14
- bne- lwarx_loop_stop
- isync
-
- bl save_sprs_to_stack
-
- PPC_STOP /* Does not return (system reset interrupt) */
-
-/*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
- */
-_GLOBAL(power7_idle_insn)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
-
-#define CHECK_HMI_INTERRUPT \
-BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
-FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
- cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne+ 20f; \
- /* Invoke opal call to handle hmi */ \
- ld r2,PACATOC(r13); \
- ld r1,PACAR1(r13); \
- std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r3,0; /* NULL argument */ \
- bl hmi_exception_realmode; \
- nop; \
- ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
-20: nop;
-
-/*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired PSSCR register value.
+ * Desired instruction type in r3
*
- * Offline (CPU unplug) case also must notify KVM that the CPU is
- * idle.
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
+ *
+ * This must be called in real-mode (MSR_IDLE).
*/
-_GLOBAL(power9_offline_stop)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- */
- li r4,KVM_HWTHREAD_IN_IDLE
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- /* fall through */
-
-_GLOBAL(power9_idle_stop)
- std r3, PACA_REQ_PSSCR(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-BEGIN_FTR_SECTION
- sync
- lwz r5, PACA_DONT_STOP(r13)
- cmpwi r5, 0
+_GLOBAL(isa206_idle_insn_mayloss)
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ cmpwi r3,PNV_THREAD_NAP
bne 1f
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
-#endif
- mtspr SPRN_PSSCR,r3
- LOAD_REG_ADDR(r4,power_enter_stop)
- b pnv_powersave_common
- /* No return */
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-1:
- /*
- * We get here when TM / thread reconfiguration bug workaround
- * code wants to get the CPU into SMT4 mode, and therefore
- * we are being asked not to stop.
- */
- li r3, 0
- std r3, PACA_REQ_PSSCR(r13)
- blr /* return 0 for wakeup cause / SRR1 value */
-#endif
-
-/*
- * Called from machine check handler for powersave wakeups.
- * Low level machine check processing has already been done. Now just
- * go through the wake up path to get everything in order.
- *
- * r3 - The original SRR1 value.
- * Original SRR[01] have been clobbered.
- * MSR_RI is clear.
- */
-.global pnv_powersave_wakeup_mce
-pnv_powersave_wakeup_mce:
- /* Set cr3 for pnv_powersave_wakeup */
- rlwinm r11,r3,47-31,30,31
- cmpwi cr3,r11,2
-
- /*
- * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
- * reason into r12, which allows reuse of the system reset wakeup
- * code without being mistaken for another type of wakeup.
- */
- oris r12,r3,SRR1_WAKEMCE_RESVD@h
-
- b pnv_powersave_wakeup
-
-/*
- * Called from reset vector for powersave wakeups.
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
- * r12 - SRR1
- */
-.global pnv_powersave_wakeup
-pnv_powersave_wakeup:
- ld r2, PACATOC(r13)
-
-BEGIN_FTR_SECTION
- bl pnv_restore_hyp_resource_arch300
-FTR_SECTION_ELSE
- bl pnv_restore_hyp_resource_arch207
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
-
- li r0,PNV_THREAD_RUNNING
- stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
-
- mr r3,r12
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- lbz r0,HSTATE_HWTHREAD_STATE(r13)
- cmpwi r0,KVM_HWTHREAD_IN_KERNEL
- beq 0f
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
-0: lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
-#endif
-
- /* Return SRR1 from power7_nap() */
- blt cr3,pnv_wakeup_noloss
- b pnv_wakeup_loss
-
-/*
- * Check whether we have woken up with hypervisor state loss.
- * If yes, restore hypervisor state and return back to link.
- *
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
- */
-pnv_restore_hyp_resource_arch300:
- /*
- * Workaround for POWER9, if we lost resources, the ERAT
- * might have been mixed up and needs flushing. We also need
- * to reload MMCR0 (see comment above). We also need to set
- * then clear bit 60 in MMCRA to ensure the PMU starts running.
- */
- blt cr3,1f
-BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
- ld r1,PACAR1(r13)
- ld r4,_MMCR0(r1)
- mtspr SPRN_MMCR0,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
- mfspr r4,SPRN_MMCRA
- ori r4,r4,(1 << (63-60))
- mtspr SPRN_MMCRA,r4
- xori r4,r4,(1 << (63-60))
- mtspr SPRN_MMCRA,r4
-1:
- /*
- * POWER ISA 3. Use PSSCR to determine if we
- * are waking up from deep idle state
- */
- LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
- ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
-
- /*
- * 0-3 bits correspond to Power-Saving Level Status
- * which indicates the idle state we are waking up from
- */
- mfspr r5, SPRN_PSSCR
- rldicl r5,r5,4,60
- li r0, 0 /* clear requested_psscr to say we're awake */
- std r0, PACA_REQ_PSSCR(r13)
- cmpd cr4,r5,r4
- bge cr4,pnv_wakeup_tb_loss /* returns to caller */
-
- blr /* Waking up without hypervisor state loss. */
-
-/* Same calling convention as arch300 */
-pnv_restore_hyp_resource_arch207:
- /*
- * POWER ISA 2.07 or less.
- * Check if we slept with sleep or winkle.
- */
- lbz r4,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r4,PNV_THREAD_NAP
- bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
-
- /*
- * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
- * up from nap. At this stage CR3 shouldn't contains 'gt' since that
- * indicates we are waking with hypervisor state loss from nap.
- */
- bgt cr3,.
-
- blr /* Waking up without hypervisor state loss */
-
-/*
- * Called if waking up from idle state which can cause either partial or
- * complete hyp state loss.
- * In POWER8, called if waking up from fastsleep or winkle
- * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
- *
- * r13 - PACA
- * cr3 - gt if waking up with partial/complete hypervisor state loss
- *
- * If ISA300:
- * cr4 - gt or eq if waking up from complete hypervisor state loss.
- *
- * If ISA207:
- * r4 - PACA_THREAD_IDLE_STATE
- */
-pnv_wakeup_tb_loss:
- ld r1,PACAR1(r13)
- /*
- * Before entering any idle state, the NVGPRs are saved in the stack.
- * If there was a state loss, or PACA_NAPSTATELOST was set, then the
- * NVGPRs are restored. If we are here, it is likely that state is lost,
- * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
- * here are the same as the test to restore NVGPRS:
- * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
- * and SRR1 test for restoring NVGPRs.
- *
- * We are about to clobber NVGPRs now, so set NAPSTATELOST to
- * guarantee they will always be restored. This might be tightened
- * with careful reading of specs (particularly for ISA300) but this
- * is already a slow wakeup path and it's simpler to be safe.
- */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /*
- *
- * Save SRR1 and LR in NVGPRs as they might be clobbered in
- * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
- * to determine the wakeup reason if we branch to kvm_start_guest. LR
- * is required to return back to reset vector after hypervisor state
- * restore is complete.
- */
- mr r19,r12
- mr r18,r4
- mflr r17
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
- lbz r7,PACA_THREAD_MASK(r13)
-
- /*
- * Take the core lock to synchronize against other threads.
- *
- * Lock bit is set in one of the 2 cases-
- * a. In the sleep/winkle enter path, the last thread is executing
- * fastsleep workaround code.
- * b. In the wake up path, another thread is executing fastsleep
- * workaround undo code or resyncing timebase or restoring context
- * In either case loop until the lock bit is cleared.
- */
-1:
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
- oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- stwcx. r15,0,r14
- bne- 1b
- isync
-
- andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
- cmpwi cr2,r9,0
-
- /*
- * At this stage
- * cr2 - eq if first thread to wakeup in core
- * cr3- gt if waking up with partial/complete hypervisor state loss
- * ISA300:
- * cr4 - gt or eq if waking up from complete hypervisor state loss.
- */
-
-BEGIN_FTR_SECTION
- /*
- * Were we in winkle?
- * If yes, check if all threads were in winkle, decrement our
- * winkle count, set all thread winkle bits if all were in winkle.
- * Check if our thread has a winkle bit set, and set cr4 accordingly
- * (to match ISA300, above). Pseudo-code for core idle state
- * transitions for ISA207 is as follows (everything happens atomically
- * due to store conditional and/or lock bit):
- *
- * nap_idle() { }
- * nap_wake() { }
- *
- * sleep_idle()
- * {
- * core_idle_state &= ~thread_in_core
- * }
- *
- * sleep_wake()
- * {
- * bool first_in_core, first_in_subcore;
- *
- * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
- * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
- *
- * core_idle_state |= thread_in_core;
- * }
- *
- * winkle_idle()
- * {
- * core_idle_state &= ~thread_in_core;
- * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
- * }
- *
- * winkle_wake()
- * {
- * bool first_in_core, first_in_subcore, winkle_state_lost;
- *
- * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
- * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
- *
- * core_idle_state |= thread_in_core;
- *
- * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
- * core_idle_state |= THREAD_WINKLE_BITS;
- * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
- *
- * winkle_state_lost = core_idle_state &
- * (thread_in_core << WINKLE_THREAD_SHIFT);
- * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
- * }
- *
- */
- cmpwi r18,PNV_THREAD_WINKLE
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
+1: cmpwi r3,PNV_THREAD_SLEEP
bne 2f
- andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
- subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
- beq 2f
- ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
-2:
- /* Shift thread bit to winkle mask, then test if this thread is set,
- * and remove it from the winkle bits */
- slwi r8,r7,8
- and r8,r8,r15
- andc r15,r15,r8
- cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
+2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
- lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
- and r4,r4,r15
- cmpwi r4,0 /* Check if first in subcore */
-
- or r15,r15,r7 /* Set thread bit */
- beq first_thread_in_subcore
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
- or r15,r15,r7 /* Set thread bit */
- beq cr2,first_thread_in_core
-
- /* Not first thread in core or subcore to wake up */
- b clear_lock
-
-first_thread_in_subcore:
- /*
- * If waking up from sleep, subcore state is not lost. Hence
- * skip subcore state restore
- */
- blt cr4,subcore_state_restored
-
- /* Restore per-subcore state */
- ld r4,_SDR1(r1)
- mtspr SPRN_SDR1,r4
-
- ld r4,_RPR(r1)
- mtspr SPRN_RPR,r4
- ld r4,_AMOR(r1)
- mtspr SPRN_AMOR,r4
-
-subcore_state_restored:
- /*
- * Check if the thread is also the first thread in the core. If not,
- * skip to clear_lock.
- */
- bne cr2,clear_lock
-
-first_thread_in_core:
-
- /*
- * First thread in the core waking up from any state which can cause
- * partial or complete hypervisor state loss. It needs to
- * call the fastsleep workaround code if the platform requires it.
- * Call it unconditionally here. The below branch instruction will
- * be patched out if the platform does not have fastsleep or does not
- * require the workaround. Patching will be performed during the
- * discovery of idle-states.
- */
-.global pnv_fastsleep_workaround_at_exit
-pnv_fastsleep_workaround_at_exit:
- b fastsleep_workaround_at_exit
-
-timebase_resync:
- /*
- * Use cr3 which indicates that we are waking up with atleast partial
- * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
- */
- ble cr3,.Ltb_resynced
- /* Time base re-sync */
- bl opal_resync_timebase;
- /*
- * If waking up from sleep (POWER8), per core state
- * is not lost, skip to clear_lock.
- */
-.Ltb_resynced:
- blt cr4,clear_lock
-
- /*
- * First thread in the core to wake up and its waking up with
- * complete hypervisor state loss. Restore per core hypervisor
- * state.
- */
-BEGIN_FTR_SECTION
- ld r4,_PTCR(r1)
- mtspr SPRN_PTCR,r4
- ld r4,_RPR(r1)
- mtspr SPRN_RPR,r4
- ld r4,_AMOR(r1)
- mtspr SPRN_AMOR,r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- ld r4,_TSCR(r1)
- mtspr SPRN_TSCR,r4
- ld r4,_WORC(r1)
- mtspr SPRN_WORC,r4
-
-clear_lock:
- xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- lwsync
- stw r15,0(r14)
-
-common_exit:
- /*
- * Common to all threads.
- *
- * If waking up from sleep, hypervisor state is not lost. Hence
- * skip hypervisor state restore.
- */
- blt cr4,hypervisor_state_restored
-
- /* Waking up from winkle */
-
-BEGIN_MMU_FTR_SECTION
- b no_segments
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
- /* Restore SLB from PACA */
- ld r8,PACA_SLBSHADOWPTR(r13)
-
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7,r5,SLB_ESID_V@h
- beq 1f
- slbmte r6,r5
-1: addi r8,r8,16
- .endr
-no_segments:
-
- /* Restore per thread state */
-
- ld r4,_SPURR(r1)
- mtspr SPRN_SPURR,r4
- ld r4,_PURR(r1)
- mtspr SPRN_PURR,r4
- ld r4,_DSCR(r1)
- mtspr SPRN_DSCR,r4
- ld r4,_WORT(r1)
- mtspr SPRN_WORT,r4
-
- /* Call cur_cpu_spec->cpu_restore() */
- LOAD_REG_ADDR(r4, cur_cpu_spec)
- ld r4,0(r4)
- ld r12,CPU_SPEC_RESTORE(r4)
-#ifdef PPC64_ELF_ABI_v1
- ld r12,0(r12)
-#endif
- mtctr r12
- bctrl
-
-/*
- * On POWER9, we can come here on wakeup from a cpuidle stop state.
- * Hence restore the additional SPRs to the saved value.
- *
- * On POWER8, we come here only on winkle. Since winkle is used
- * only in the case of CPU-Hotplug, we don't need to restore
- * the additional SPRs.
- */
-BEGIN_FTR_SECTION
- bl power9_restore_additional_sprs
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-hypervisor_state_restored:
-
- mr r12,r19
- mtlr r17
- blr /* return to pnv_powersave_wakeup */
-
-fastsleep_workaround_at_exit:
- li r3,1
- li r4,0
- bl opal_config_cpu_idle_state
- b timebase_resync
-
-/*
- * R3 here contains the value that will be returned to the caller
- * of power7_nap.
- * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
- */
-.global pnv_wakeup_loss
-pnv_wakeup_loss:
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- REST_NVGPRS(r1)
- REST_GPR(2, r1)
- ld r4,PACAKMSR(r13)
- ld r5,_LINK(r1)
- ld r6,_CCR(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtlr r5
- mtcr r6
- mtmsrd r4
- blr
-
-/*
- * R3 here contains the value that will be returned to the caller
- * of power7_nap.
- * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
- */
-pnv_wakeup_noloss:
- lbz r0,PACA_NAPSTATELOST(r13)
- cmpwi r0,0
- bne pnv_wakeup_loss
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r4,PACAKMSR(r13)
- ld r5,_NIP(r1)
- ld r6,_CCR(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtlr r5
- mtcr r6
- mtmsrd r4
- blr
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 583e55a..308f499 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
* Dave Liu <daveliu@freescale.com>
* copy from idle_6xx.S and modify for e500 based processor,
* implement the power_save function in idle.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -22,10 +18,9 @@
.text
_GLOBAL(e500_idle)
- CURRENT_THREAD_INFO(r3, r1)
- lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ lwz r4,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4,r4,_TLF_NAPPING /* so when we take an exception */
- stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ stw r4,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
#ifdef CONFIG_PPC_E500MC
wrteei 1
@@ -88,8 +83,7 @@
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r1)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index a09b3c7..33c6253 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains the power_save function for 970-family CPUs.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -68,7 +64,7 @@
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c
index 5ea42c9..720e50e 100644
--- a/arch/powerpc/kernel/ima_kexec.c
+++ b/arch/powerpc/kernel/ima_kexec.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2016 IBM Corporation
*
* Authors:
* Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/slab.h>
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index aa9f1b8..0276bc8 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Support PCI IO workaround
*
* Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
* IBM, Corp.
* (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#undef DEBUG
@@ -152,11 +149,11 @@
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller)
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
- void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+ void __iomem *res = __ioremap_caller(addr, size, prot, caller);
int busno;
bus = iowa_pci_find(0, (unsigned long)addr);
@@ -166,20 +163,17 @@
}
return res;
}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+bool io_workaround_inited;
+
/* Enable IO workaround */
static void io_workaround_init(void)
{
- static int io_workaround_inited;
-
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
+ io_workaround_inited = true;
}
/* Register new bus to support workaround */
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index 2a2b4ae..2f29b7d 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* I/O string operations
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -10,11 +11,6 @@
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*
* Rewritten in C by Stephen Rothwell.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/types.h>
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 19b4c62..9704f3f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
@@ -6,20 +7,6 @@
* and Ben. Herrenschmidt, IBM Corporation
*
* Dynamic DMA mapping support, bus-independent parts.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
@@ -47,6 +34,7 @@
#include <asm/fadump.h>
#include <asm/vio.h>
#include <asm/tce.h>
+#include <asm/mmu_context.h>
#define DBG(...)
@@ -197,11 +185,11 @@
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
if (should_fail_iommu(dev))
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
/*
* We don't need to disable preemption here because any CPU can
@@ -277,7 +265,7 @@
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
}
@@ -309,13 +297,13 @@
unsigned long attrs)
{
unsigned long entry;
- dma_addr_t ret = IOMMU_MAPPING_ERROR;
+ dma_addr_t ret = DMA_MAPPING_ERROR;
int build_fail;
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == IOMMU_MAPPING_ERROR))
- return IOMMU_MAPPING_ERROR;
+ if (unlikely(entry == DMA_MAPPING_ERROR))
+ return DMA_MAPPING_ERROR;
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << tbl->it_page_shift; /* Set the return dma address */
@@ -327,12 +315,12 @@
/* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
- * IOMMU_MAPPING_ERROR. For all other errors the functionality is
+ * DMA_MAPPING_ERROR. For all other errors the functionality is
* not altered.
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/* Flush/invalidate TLB caches if necessary */
@@ -477,7 +465,7 @@
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
/* Handle failure */
- if (unlikely(entry == IOMMU_MAPPING_ERROR)) {
+ if (unlikely(entry == DMA_MAPPING_ERROR)) {
if (!(attrs & DMA_ATTR_NO_WARN) &&
printk_ratelimit())
dev_info(dev, "iommu_alloc failed, tbl %p "
@@ -544,7 +532,7 @@
*/
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = IOMMU_MAPPING_ERROR;
+ outs->dma_address = DMA_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -562,7 +550,7 @@
npages = iommu_num_pages(s->dma_address, s->dma_length,
IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
- s->dma_address = IOMMU_MAPPING_ERROR;
+ s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -645,11 +633,54 @@
#endif
}
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end)
+{
+ int i;
+
+ WARN_ON_ONCE(res_end < res_start);
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
+
+ /* Check if res_start..res_end isn't empty and overlaps the table */
+ if (res_start && res_end &&
+ (tbl->it_offset + tbl->it_size < res_start ||
+ res_end < tbl->it_offset))
+ return;
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
+static void iommu_table_release_pages(struct iommu_table *tbl)
+{
+ int i;
+
+ /*
+ * In case we have reserved the first bit, we should not emit
+ * the warning below.
+ */
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ clear_bit(i - tbl->it_offset, tbl->it_map);
+}
+
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+ unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@@ -668,13 +699,7 @@
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
- /*
- * Reserve page 0 so it will not be used for any mappings.
- * This avoids buggy drivers that consider page 0 to be invalid
- * to crash the machine or even lose data.
- */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -726,12 +751,7 @@
return;
}
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@@ -776,7 +796,7 @@
unsigned long mask, enum dma_data_direction direction,
unsigned long attrs)
{
- dma_addr_t dma_handle = IOMMU_MAPPING_ERROR;
+ dma_addr_t dma_handle = DMA_MAPPING_ERROR;
void *vaddr;
unsigned long uaddr;
unsigned int npages, align;
@@ -785,9 +805,9 @@
vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
align = 0;
if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
((unsigned long)vaddr & ~PAGE_MASK) == 0)
@@ -796,7 +816,7 @@
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
mask >> tbl->it_page_shift, align,
attrs);
- if (dma_handle == IOMMU_MAPPING_ERROR) {
+ if (dma_handle == DMA_MAPPING_ERROR) {
if (!(attrs & DMA_ATTR_NO_WARN) &&
printk_ratelimit()) {
dev_info(dev, "iommu_alloc failed, tbl %p "
@@ -868,7 +888,7 @@
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
- if (mapping == IOMMU_MAPPING_ERROR) {
+ if (mapping == DMA_MAPPING_ERROR) {
free_pages((unsigned long)ret, order);
return NULL;
}
@@ -993,25 +1013,32 @@
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction)
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
{
long ret;
+ unsigned long size = 0;
- ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
-
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
- (*direction == DMA_BIDIRECTIONAL)))
+ (*direction == DMA_BIDIRECTIONAL)) &&
+ !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
+ &size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
- /* if (unlikely(ret))
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << tbl->it_page_shift,
- hwaddr, ret); */
-
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
+
+void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, false);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@@ -1025,22 +1052,21 @@
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
- if (!tbl->it_ops->exchange)
+ if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1063,9 +1089,8 @@
memset(tbl->it_map, 0, sz);
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);
@@ -1073,11 +1098,8 @@
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);
-int iommu_add_device(struct device *dev)
+int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
- struct iommu_table *tbl;
- struct iommu_table_group_link *tgl;
-
/*
* The sysfs entries should be populated before
* binding IOMMU group. If sysfs entries isn't
@@ -1086,39 +1108,17 @@
if (!device_is_registered(dev))
return -ENOENT;
- if (dev->iommu_group) {
+ if (device_iommu_mapped(dev)) {
pr_debug("%s: Skipping device %s with iommu group %d\n",
__func__, dev_name(dev),
iommu_group_id(dev->iommu_group));
return -EBUSY;
}
- tbl = get_iommu_table_base(dev);
- if (!tbl) {
- pr_debug("%s: Skipping device %s with no tbl\n",
- __func__, dev_name(dev));
- return 0;
- }
-
- tgl = list_first_entry_or_null(&tbl->it_group_list,
- struct iommu_table_group_link, next);
- if (!tgl) {
- pr_debug("%s: Skipping device %s with no group\n",
- __func__, dev_name(dev));
- return 0;
- }
pr_debug("%s: Adding %s to iommu group %d\n",
- __func__, dev_name(dev),
- iommu_group_id(tgl->table_group->group));
+ __func__, dev_name(dev), iommu_group_id(table_group->group));
- if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
- pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
- __func__, IOMMU_PAGE_SIZE(tbl),
- PAGE_SIZE, dev_name(dev));
- return -EINVAL;
- }
-
- return iommu_group_add_device(tgl->table_group->group, dev);
+ return iommu_group_add_device(table_group->group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
@@ -1129,7 +1129,7 @@
* and we needn't detach them from the associated
* IOMMU groups
*/
- if (!dev->iommu_group) {
+ if (!device_iommu_mapped(dev)) {
pr_debug("iommu_tce: skipping device %s with no tbl\n",
dev_name(dev));
return;
@@ -1138,31 +1138,4 @@
iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_del_device);
-
-static int tce_iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_ADD_DEVICE:
- return iommu_add_device(dev);
- case BUS_NOTIFY_DEL_DEVICE:
- if (dev->iommu_group)
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = tce_iommu_bus_notifier,
-};
-
-int __init tce_iommu_bus_notifier_init(void)
-{
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
-}
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 916ddc4..5645bc9 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from arch/i386/kernel/irq.c
* Copyright (C) 1992 Linus Torvalds
@@ -8,11 +9,6 @@
* Adapted for Power Macintosh by Paul Mackerras
* Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* This file contains the code used by various IRQ handling routines:
* asking for different IRQ's should be done through these routines
* instead of just grabbing them. Thus setups with different IRQ numbers
@@ -81,10 +77,7 @@
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
-int __irq_offset_value;
-
#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(__irq_offset_value);
atomic_t ppc_n_lost_interrupts;
#ifdef CONFIG_TAU_INT
@@ -261,16 +254,9 @@
*/
irq_happened = get_irq_happened();
if (!irq_happened) {
- /*
- * FIXME. Here we'd like to be able to do:
- *
- * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- * WARN_ON(!(mfmsr() & MSR_EE));
- * #endif
- *
- * But currently it hits in a few paths, we should fix those and
- * enable the warning.
- */
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
+#endif
return;
}
@@ -282,7 +268,7 @@
*/
if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON(!(mfmsr() & MSR_EE));
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
#endif
__hard_irq_disable();
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
@@ -293,7 +279,7 @@
* warn if we are wrong. Only do that when IRQ tracing
* is enabled as mfmsr() can be costly.
*/
- if (WARN_ON(mfmsr() & MSR_EE))
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
__hard_irq_disable();
#endif
}
@@ -618,9 +604,8 @@
sp = current_stack_pointer() & (THREAD_SIZE-1);
/* check for stack overflow: is there less than 2KB free? */
- if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
- pr_err("do_IRQ: stack overflow: %ld\n",
- sp - sizeof(struct thread_info));
+ if (unlikely(sp < 2048)) {
+ pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
#endif
@@ -660,36 +645,21 @@
void do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- struct thread_info *curtp, *irqtp, *sirqtp;
+ void *cursp, *irqsp, *sirqsp;
/* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[raw_smp_processor_id()];
- sirqtp = softirq_ctx[raw_smp_processor_id()];
+ cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ irqsp = hardirq_ctx[raw_smp_processor_id()];
+ sirqsp = softirq_ctx[raw_smp_processor_id()];
/* Already there ? */
- if (unlikely(curtp == irqtp || curtp == sirqtp)) {
+ if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
set_irq_regs(old_regs);
return;
}
-
- /* Prepare the thread_info in the irq stack */
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the preempt_count so that the [soft]irq checks work. */
- irqtp->preempt_count = curtp->preempt_count;
-
/* Switch stack and call */
- call_do_irq(regs, irqtp);
-
- /* Restore stack limit */
- irqtp->task = NULL;
-
- /* Copy back updates to the thread_info */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ call_do_irq(regs, irqsp);
set_irq_regs(old_regs);
}
@@ -698,90 +668,20 @@
{
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
-
- exc_lvl_ctx_init();
-
- irq_ctx_init();
}
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
-
-void exc_lvl_ctx_init(void)
-{
- struct thread_info *tp;
- int i, cpu_nr;
-
- for_each_possible_cpu(i) {
-#ifdef CONFIG_PPC64
- cpu_nr = i;
-#else
-#ifdef CONFIG_SMP
- cpu_nr = get_hard_smp_processor_id(i);
-#else
- cpu_nr = 0;
-#endif
+void *critirq_ctx[NR_CPUS] __read_mostly;
+void *dbgirq_ctx[NR_CPUS] __read_mostly;
+void *mcheckirq_ctx[NR_CPUS] __read_mostly;
#endif
- memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = critirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
-#ifdef CONFIG_BOOKE
- memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = dbgirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
- memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = mcheckirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = HARDIRQ_OFFSET;
-#endif
- }
-}
-#endif
-
-struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
-
-void irq_ctx_init(void)
-{
- struct thread_info *tp;
- int i;
-
- for_each_possible_cpu(i) {
- memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
- tp = softirq_ctx[i];
- tp->cpu = i;
- klp_init_thread_info(tp);
-
- memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
- tp = hardirq_ctx[i];
- tp->cpu = i;
- klp_init_thread_info(tp);
- }
-}
+void *softirq_ctx[NR_CPUS] __read_mostly;
+void *hardirq_ctx[NR_CPUS] __read_mostly;
void do_softirq_own_stack(void)
{
- struct thread_info *curtp, *irqtp;
-
- curtp = current_thread_info();
- irqtp = softirq_ctx[smp_processor_id()];
- irqtp->task = curtp->task;
- irqtp->flags = 0;
- call_do_softirq(irqtp);
- irqtp->task = NULL;
-
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ call_do_softirq(softirq_ctx[smp_processor_id()]);
}
irq_hw_number_t virq_to_hw(unsigned int virq)
@@ -827,11 +727,6 @@
}
#endif
-int arch_early_irq_init(void)
-{
- return 0;
-}
-
#ifdef CONFIG_PPC64
static int __init setup_noirqdistrib(char *str)
{
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 1df6c74..773671b 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Routines for tracking a legacy ISA bridge
*
@@ -6,11 +7,6 @@
* Some bits and pieces moved over from pci_64.c
*
* Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define DEBUG
@@ -110,14 +106,14 @@
size = 0x10000;
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, pgprot_val(pgprot_noncached(__pgprot(0))));
+ size, pgprot_noncached(PAGE_KERNEL));
return;
inval_range:
printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
"mapping 64k\n");
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
+ 0x10000, pgprot_noncached(PAGE_KERNEL));
}
@@ -253,7 +249,7 @@
*/
isa_io_base = ISA_IO_BASE;
__ioremap_at(pbase, (void *)ISA_IO_BASE,
- size, pgprot_val(pgprot_noncached(__pgprot(0))));
+ size, pgprot_noncached(PAGE_KERNEL));
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
@@ -327,8 +323,7 @@
/* Check if we have no ISA device, and this happens to be one,
* register it as such if it has an OF device
*/
- if (!isa_bridge_devnode && devnode && devnode->type &&
- !strcmp(devnode->type, "isa"))
+ if (!isa_bridge_devnode && of_node_is_type(devnode, "isa"))
isa_bridge_find_late(pdev, devnode);
return 0;
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index 6472472..ca37702 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -1,17 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2010 Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/jump_label.h>
#include <asm/code-patching.h>
-#ifdef HAVE_JUMP_LABEL
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
@@ -22,4 +17,3 @@
else
patch_instruction(addr, PPC_INST_NOP);
}
-#endif
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index ba4f18a..3072fd6 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Load ELF vmlinux file for the kexec_file_load syscall.
*
@@ -10,15 +11,6 @@
* Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
* Heavily modified for the kernel by
* Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation (version 2 of the License).
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#define pr_fmt(fmt) "kexec_elf: " fmt
@@ -31,540 +23,6 @@
#include <linux/slab.h>
#include <linux/types.h>
-#define PURGATORY_STACK_SIZE (16 * 1024)
-
-#define elf_addr_to_cpu elf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_Rel Elf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
- /*
- * Where the ELF binary contents are kept.
- * Memory managed by the user of the struct.
- */
- const char *buffer;
-
- const struct elfhdr *ehdr;
- const struct elf_phdr *proghdrs;
- struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
- return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le64_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be64_to_cpu(value);
-
- return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le16_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be16_to_cpu(value);
-
- return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le32_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be32_to_cpu(value);
-
- return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
- if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
- pr_debug("Bad program header size.\n");
- return false;
- } else if (ehdr->e_shnum > 0 &&
- ehdr->e_shentsize != sizeof(struct elf_shdr)) {
- pr_debug("Bad section header size.\n");
- return false;
- } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
- ehdr->e_version != EV_CURRENT) {
- pr_debug("Unknown ELF version.\n");
- return false;
- }
-
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- size_t phdr_size;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- /* Sanity check the program header table location. */
- if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
- pr_debug("Program headers at invalid location.\n");
- return false;
- } else if (ehdr->e_phoff + phdr_size > buf_len) {
- pr_debug("Program headers truncated.\n");
- return false;
- }
- }
-
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- size_t shdr_size;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
- /* Sanity check the section header table location. */
- if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
- pr_debug("Section headers at invalid location.\n");
- return false;
- } else if (ehdr->e_shoff + shdr_size > buf_len) {
- pr_debug("Section headers truncated.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
-{
- struct elfhdr *buf_ehdr;
-
- if (len < sizeof(*buf_ehdr)) {
- pr_debug("Buffer is too small to hold ELF header.\n");
- return -ENOEXEC;
- }
-
- memset(ehdr, 0, sizeof(*ehdr));
- memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
- if (!elf_is_elf_file(ehdr)) {
- pr_debug("No ELF header magic.\n");
- return -ENOEXEC;
- }
-
- if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
- pr_debug("Not a supported ELF class.\n");
- return -ENOEXEC;
- } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
- ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
- pr_debug("Not a supported ELF data format.\n");
- return -ENOEXEC;
- }
-
- buf_ehdr = (struct elfhdr *) buf;
- if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
- pr_debug("Bad ELF header size.\n");
- return -ENOEXEC;
- }
-
- ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
- ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
- ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
- ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
- ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
- ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
- ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
- ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
- ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
- ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
- ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
- ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
-
- return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_is_phdr_sane - check that it is safe to use the program header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
-{
-
- if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
- pr_debug("ELF segment location wraps around.\n");
- return false;
- } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
- pr_debug("ELF segment not in file.\n");
- return false;
- } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
- pr_debug("ELF segment address wraps around.\n");
- return false;
- }
-
- return true;
-}
-
-static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- /* Override the const in proghdrs, we are the ones doing the loading. */
- struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
- const char *pbuf;
- struct elf_phdr *buf_phdr;
-
- pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
- buf_phdr = (struct elf_phdr *) pbuf;
-
- phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
- phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
- phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
- phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
- phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
- phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
- phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
-
- return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_phdrs - read the program headers from the buffer
- *
- * This function assumes that the program header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_phdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t phdr_size, i;
- const struct elfhdr *ehdr = elf_info->ehdr;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
- if (!elf_info->proghdrs)
- return -ENOMEM;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- int ret;
-
- ret = elf_read_phdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->proghdrs);
- elf_info->proghdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
- bool size_ok;
-
- /* SHT_NULL headers have undefined values, so we can't check them. */
- if (shdr->sh_type == SHT_NULL)
- return true;
-
- /* Now verify sh_entsize */
- switch (shdr->sh_type) {
- case SHT_SYMTAB:
- size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
- break;
- case SHT_RELA:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
- break;
- case SHT_DYNAMIC:
- size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
- break;
- case SHT_REL:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
- break;
- case SHT_NOTE:
- case SHT_PROGBITS:
- case SHT_HASH:
- case SHT_NOBITS:
- default:
- /*
- * This is a section whose entsize requirements
- * I don't care about. If I don't know about
- * the section I can't care about it's entsize
- * requirements.
- */
- size_ok = true;
- break;
- }
-
- if (!size_ok) {
- pr_debug("ELF section with wrong entry size.\n");
- return false;
- } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
- pr_debug("ELF section address wraps around.\n");
- return false;
- }
-
- if (shdr->sh_type != SHT_NOBITS) {
- if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
- pr_debug("ELF section location wraps around.\n");
- return false;
- } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
- pr_debug("ELF section not in file.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- struct elf_shdr *shdr = &elf_info->sechdrs[idx];
- const struct elfhdr *ehdr = elf_info->ehdr;
- const char *sbuf;
- struct elf_shdr *buf_shdr;
-
- sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
- buf_shdr = (struct elf_shdr *) sbuf;
-
- shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
- shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
- shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
- shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
- shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
- shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
- shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
- shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
- shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
- return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t shdr_size, i;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
- elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
- if (!elf_info->sechdrs)
- return -ENOMEM;
-
- for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
- int ret;
-
- ret = elf_read_shdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->sechdrs);
- elf_info->sechdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
- * @buf: Buffer to read ELF file from.
- * @len: Size of @buf.
- * @ehdr: Pointer to existing struct which will be populated.
- * @elf_info: Pointer to existing struct which will be populated.
- *
- * This function allows reading ELF files with different byte order than
- * the kernel, byte-swapping the fields as needed.
- *
- * Return:
- * On success returns 0, and the caller should call elf_free_info(elf_info) to
- * free the memory allocated for the section and program headers.
- */
-int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int ret;
-
- ret = elf_read_ehdr(buf, len, ehdr);
- if (ret)
- return ret;
-
- elf_info->buffer = buf;
- elf_info->ehdr = ehdr;
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- ret = elf_read_phdrs(buf, len, elf_info);
- if (ret)
- return ret;
- }
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- ret = elf_read_shdrs(buf, len, elf_info);
- if (ret) {
- kfree(elf_info->proghdrs);
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_free_info - free memory allocated by elf_read_from_buffer
- */
-void elf_free_info(struct elf_info *elf_info)
-{
- kfree(elf_info->proghdrs);
- kfree(elf_info->sechdrs);
- memset(elf_info, 0, sizeof(*elf_info));
-}
-/**
- * build_elf_exec_info - read ELF executable and check that we can use it
- */
-static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int i;
- int ret;
-
- ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
- if (ret)
- return ret;
-
- /* Big endian vmlinux has type ET_DYN. */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
- pr_err("Not an ELF executable.\n");
- goto error;
- } else if (!elf_info->proghdrs) {
- pr_err("No ELF program header.\n");
- goto error;
- }
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- /*
- * Kexec does not support loading interpreters.
- * In addition this check keeps us from attempting
- * to kexec ordinay executables.
- */
- if (elf_info->proghdrs[i].p_type == PT_INTERP) {
- pr_err("Requires an ELF interpreter.\n");
- goto error;
- }
- }
-
- return 0;
-error:
- elf_free_info(elf_info);
- return -ENOEXEC;
-}
-
-static int elf64_probe(const char *buf, unsigned long len)
-{
- struct elfhdr ehdr;
- struct elf_info elf_info;
- int ret;
-
- ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
- if (ret)
- return ret;
-
- elf_free_info(&elf_info);
-
- return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_exec_load - load ELF executable image
- * @lowest_load_addr: On return, will be the address where the first PT_LOAD
- * section will be loaded in memory.
- *
- * Return:
- * 0 on success, negative value on failure.
- */
-static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
- struct elf_info *elf_info,
- unsigned long *lowest_load_addr)
-{
- unsigned long base = 0, lowest_addr = UINT_MAX;
- int ret;
- size_t i;
- struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
- .top_down = false };
-
- /* Read in the PT_LOAD segments. */
- for (i = 0; i < ehdr->e_phnum; i++) {
- unsigned long load_addr;
- size_t size;
- const struct elf_phdr *phdr;
-
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.memsz = phdr->p_memsz;
- kbuf.buf_align = phdr->p_align;
- kbuf.buf_min = phdr->p_paddr + base;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- load_addr = kbuf.mem;
-
- if (load_addr < lowest_addr)
- lowest_addr = load_addr;
- }
-
- /* Update entry point to reflect new load address. */
- ehdr->e_entry += base;
-
- *lowest_load_addr = lowest_addr;
- ret = 0;
- out:
- return ret;
-}
-
static void *elf64_load(struct kimage *image, char *kernel_buf,
unsigned long kernel_len, char *initrd,
unsigned long initrd_len, char *cmdline,
@@ -577,17 +35,18 @@
void *fdt;
const void *slave_code;
struct elfhdr ehdr;
- struct elf_info elf_info;
+ struct kexec_elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size };
struct kexec_buf pbuf = { .image = image, .buf_min = 0,
- .buf_max = ppc64_rma_size, .top_down = true };
+ .buf_max = ppc64_rma_size, .top_down = true,
+ .mem = KEXEC_BUF_MEM_UNKNOWN };
- ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
goto out;
- ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+ ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
if (ret)
goto out;
@@ -606,6 +65,7 @@
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.top_down = false;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out;
@@ -638,6 +98,7 @@
kbuf.bufsz = kbuf.memsz = fdt_size;
kbuf.buf_align = PAGE_SIZE;
kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out;
@@ -652,13 +113,13 @@
pr_err("Error setting up the purgatory.\n");
out:
- elf_free_info(&elf_info);
+ kexec_free_elf_info(&elf_info);
/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
return ret ? ERR_PTR(ret) : fdt;
}
const struct kexec_file_ops kexec_elf64_ops = {
- .probe = elf64_probe,
+ .probe = kexec_elf_probe,
.load = elf64_load,
};
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 35e240a..7dd55eb 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -24,6 +24,7 @@
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/debug.h>
+#include <asm/code-patching.h>
#include <linux/slab.h>
/*
@@ -116,14 +117,14 @@
return kgdb_isremovedbreak(regs->nip);
}
-static int kgdb_call_nmi_hook(struct pt_regs *regs)
+static int kgdb_debugger_ipi(struct pt_regs *regs)
{
kgdb_nmicallback(raw_smp_processor_id(), regs);
return 0;
}
#ifdef CONFIG_SMP
-void kgdb_roundup_cpus(unsigned long flags)
+void kgdb_roundup_cpus(void)
{
smp_send_debugger_break();
}
@@ -144,47 +145,19 @@
if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
return 0;
- if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+ if (*(u32 *)regs->nip == BREAK_INSTR)
regs->nip += BREAK_INSTR_SIZE;
return 1;
}
-static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);
static int kgdb_singlestep(struct pt_regs *regs)
{
- struct thread_info *thread_info, *exception_thread_info;
- struct thread_info *backup_current_thread_info =
- this_cpu_ptr(&kgdb_thread_info);
-
if (user_mode(regs))
return 0;
- /*
- * On Book E and perhaps other processors, singlestep is handled on
- * the critical exception stack. This causes current_thread_info()
- * to fail, since it it locates the thread_info by masking off
- * the low bits of the current stack pointer. We work around
- * this issue by copying the thread_info from the kernel stack
- * before calling kgdb_handle_exception, and copying it back
- * afterwards. On most processors the copy is avoided since
- * exception_thread_info == thread_info.
- */
- thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
- exception_thread_info = current_thread_info();
-
- if (thread_info != exception_thread_info) {
- /* Save the original current_thread_info. */
- memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info);
- memcpy(exception_thread_info, thread_info, sizeof *thread_info);
- }
-
kgdb_handle_exception(0, SIGTRAP, 0, regs);
- if (thread_info != exception_thread_info)
- /* Restore current_thread_info lastly. */
- memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
-
return 1;
}
@@ -441,16 +414,42 @@
return -1;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr;
+ unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+ err = probe_kernel_address(addr, instr);
+ if (err)
+ return err;
+
+ err = patch_instruction(addr, BREAK_INSTR);
+ if (err)
+ return -EFAULT;
+
+ *(unsigned int *)bpt->saved_instr = instr;
+
+ return 0;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr = *(unsigned int *)bpt->saved_instr;
+ unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+ err = patch_instruction(addr, instr);
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* Global data
*/
-struct kgdb_arch arch_kgdb_ops = {
-#ifdef __LITTLE_ENDIAN__
- .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
-#else
- .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
-#endif
-};
+const struct kgdb_arch arch_kgdb_ops;
static int kgdb_not_implemented(struct pt_regs *regs)
{
@@ -475,7 +474,7 @@
old__debugger_break_match = __debugger_break_match;
old__debugger_fault_handler = __debugger_fault_handler;
- __debugger_ipi = kgdb_call_nmi_hook;
+ __debugger_ipi = kgdb_debugger_ipi;
__debugger = kgdb_debugger;
__debugger_bpt = kgdb_handle_breakpoint;
__debugger_sstep = kgdb_singlestep;
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index e4a49c0..972cb28 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Dynamic Ftrace based Kprobes Optimization
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) Hitachi Ltd., 2012
* Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
* IBM Corporation
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 5c60bb0..2d27ec4 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 683b5b3..617eba8 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -1,27 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
* Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* Authors:
* Alexander Graf <agraf@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kmemleak.h>
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
@@ -75,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
static int kvm_tmp_index;
-static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
-static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -93,7 +83,7 @@
#endif
}
-static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -102,12 +92,12 @@
#endif
}
-static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
-static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -116,17 +106,17 @@
#endif
}
-static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
-static void kvm_patch_ins_nop(u32 *inst)
+static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
-static void kvm_patch_ins_b(u32 *inst, int addr)
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -139,11 +129,11 @@
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
-static u32 *kvm_alloc(int len)
+static u32 * __init kvm_alloc(int len)
{
u32 *p;
- if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -162,7 +152,7 @@
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
-static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -215,7 +205,7 @@
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
-static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -276,7 +266,7 @@
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -333,7 +323,7 @@
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
-static void kvm_patch_ins_wrteei_0(u32 *inst)
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -374,7 +364,7 @@
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
-static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -410,7 +400,7 @@
#endif
-static void kvm_map_magic_page(void *data)
+static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -425,7 +415,7 @@
*features = out[0];
}
-static void kvm_check_ins(u32 *inst, u32 features)
+static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -669,7 +659,7 @@
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
-static void kvm_use_magic_page(void)
+static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@@ -710,19 +700,13 @@
kvm_patching_worked ? "worked" : "failed");
}
-static __init void kvm_free_tmp(void)
-{
- free_reserved_area(&kvm_tmp[kvm_tmp_index],
- &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
- goto free_tmp;
+ return 0;
if (!epapr_paravirt_enabled)
- goto free_tmp;
+ return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -732,9 +716,6 @@
powersave_nap = 1;
#endif
-free_tmp:
- kvm_free_tmp();
-
return 0;
}
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index c005088..7af6f8b 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
* Copyright 2010-2011 Freescale Semiconductor, Inc.
@@ -203,6 +192,8 @@
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+#ifdef CONFIG_BOOKE
+
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -296,6 +287,10 @@
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -345,5 +340,15 @@
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+ .balign 4
+ .global kvm_tmp
+kvm_tmp:
+ .space (64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
.global kvm_template_end
kvm_template_end:
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 6e7dbb7..2020d25 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
L2CR functions
Copyright © 1997-1998 by PowerLogix R & D, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Thur, Dec. 12, 1998.
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 5b9dce1..7cea597 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -192,7 +192,7 @@
/* Add port, irq will be dealt with later. We passed a translated
* IO port value. It will be fixed up later along with the irq
*/
- if (tsi && !strcmp(tsi->type, "tsi-bridge"))
+ if (of_node_is_type(tsi, "tsi-bridge"))
return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
0, legacy_port_flags, 0);
else
@@ -400,8 +400,7 @@
/* Next, fill our array with ISA ports */
for_each_node_by_type(np, "serial") {
struct device_node *isa = of_get_parent(np);
- if (isa && (!strcmp(isa->name, "isa") ||
- !strcmp(isa->name, "lpc"))) {
+ if (of_node_name_eq(isa, "isa") || of_node_name_eq(isa, "lpc")) {
if (of_device_is_available(np)) {
index = add_legacy_isa_port(np, isa);
if (index >= 0 && np == stdout)
@@ -415,11 +414,12 @@
/* Next, try to locate PCI ports */
for (np = NULL; (np = of_find_all_nodes(np));) {
struct device_node *pci, *parent = of_get_parent(np);
- if (parent && !strcmp(parent->name, "isa")) {
+ if (of_node_name_eq(parent, "isa")) {
of_node_put(parent);
continue;
}
- if (strcmp(np->name, "serial") && strcmp(np->type, "serial")) {
+ if (!of_node_name_eq(np, "serial") &&
+ !of_node_is_type(np, "serial")) {
of_node_put(parent);
continue;
}
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 63f5a93..c4ed328 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Code to handle transition of Linux booting another kernel.
*
* Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
* GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
* Copyright (C) 2005 IBM Corporation.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/kexec.h>
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index affe5dc..bf9f1f9 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PPC32 code to handle Linux booting another kernel.
*
* Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
* GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
* Copyright (C) 2005 IBM Corporation.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/kexec.h>
@@ -30,7 +28,6 @@
*/
void default_machine_kexec(struct kimage *image)
{
- extern const unsigned char relocate_new_kernel[];
extern const unsigned int relocate_new_kernel_size;
unsigned long page_list;
unsigned long reboot_code_buffer, reboot_code_buffer_phys;
@@ -58,6 +55,9 @@
reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
printk(KERN_INFO "Bye!\n");
+ if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
+ relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
+
/* now call it */
rnk = (relocate_new_kernel_t) reboot_code_buffer;
(*rnk)(page_list, reboot_code_buffer_phys, image->start);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a0f6f45..04a7cba 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PPC64 code to handle Linux booting another kernel.
*
* Copyright (C) 2004-2005, IBM Corp.
*
* Created by: Milton D Miller II
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
@@ -31,6 +29,8 @@
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -317,10 +317,8 @@
* We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
- kexec_stack.thread_info.task = current_thread_info()->task;
- kexec_stack.thread_info.flags = 0;
- kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
- kexec_stack.thread_info.cpu = current_thread_info()->cpu;
+ current_thread_info()->flags = 0;
+ current_thread_info()->preempt_count = HARDIRQ_OFFSET;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
* it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
@@ -331,6 +329,13 @@
#ifdef CONFIG_PPC_PSERIES
kexec_paca.lppaca_ptr = NULL;
#endif
+
+ if (is_secure_guest() && !(image->preserve_context ||
+ image->type == KEXEC_TYPE_CRASH)) {
+ uv_unshare_all_pages();
+ printk("kexec: Unshared all shared pages.\n");
+ }
+
paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
setup_paca(&kexec_paca);
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index c77e95e..143c917 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* ppc64 code to implement the kexec_file_load syscall
*
@@ -11,20 +12,10 @@
* Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
* Heavily modified for the kernel by
* Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation (version 2 of the License).
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/slab.h>
#include <linux/kexec.h>
-#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <asm/ima.h>
@@ -47,59 +38,6 @@
}
/**
- * arch_kexec_walk_mem - call func(data) for each unreserved memory block
- * @kbuf: Context info for the search. Also passed to @func.
- * @func: Function to call for each memory block.
- *
- * This function is used by kexec_add_buffer and kexec_locate_mem_hole
- * to find unreserved memory to load kexec segments into.
- *
- * Return: The memory walk will stop when func returns a non-zero value
- * and that value will be returned. If all free regions are visited without
- * func returning non-zero, then zero will be returned.
- */
-int arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(struct resource *, void *))
-{
- int ret = 0;
- u64 i;
- phys_addr_t mstart, mend;
- struct resource res = { };
-
- if (kbuf->top_down) {
- for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
- &mstart, &mend, NULL) {
- /*
- * In memblock, end points to the first byte after the
- * range while in kexec, end points to the last byte
- * in the range.
- */
- res.start = mstart;
- res.end = mend - 1;
- ret = func(&res, kbuf);
- if (ret)
- break;
- }
- } else {
- for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend,
- NULL) {
- /*
- * In memblock, end points to the first byte after the
- * range while in kexec, end points to the last byte
- * in the range.
- */
- res.start = mstart;
- res.end = mend - 1;
- ret = func(&res, kbuf);
- if (ret)
- break;
- }
- }
-
- return ret;
-}
-
-/**
* setup_purgatory - initialize the purgatory's global variables
* @image: kexec image.
* @slave_code: Slave code for the purgatory.
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a..34c1001 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Machine check exception handling.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2013 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -31,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/mce.h>
+#include <asm/nmi.h>
static DEFINE_PER_CPU(int, mce_nest_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
@@ -45,13 +33,18 @@
mce_ue_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_irq_work(struct irq_work *work);
+static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+static struct irq_work mce_ue_event_irq_work = {
+ .func = machine_check_ue_irq_work,
+};
+
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static void mce_set_error_info(struct machine_check_event *mce,
@@ -111,6 +104,7 @@
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
+ mce->cpu = get_paca()->paca_index;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
@@ -120,6 +114,8 @@
mce->initiator = mce_err->initiator;
mce->severity = mce_err->severity;
+ mce->sync_error = mce_err->sync_error;
+ mce->error_class = mce_err->error_class;
/*
* Populate the mce error_type and type-specific error_type.
@@ -153,6 +149,7 @@
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -208,11 +205,15 @@
get_mce_event(NULL, true);
}
+static void machine_check_ue_irq_work(struct irq_work *work)
+{
+ schedule_work(&mce_ue_event_work);
+}
/*
* Queue up the MCE event which then can be handled later.
*/
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
@@ -225,7 +226,7 @@
memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
/* Queue work to process this event later. */
- schedule_work(&mce_ue_event_work);
+ irq_work_queue(&mce_ue_event_irq_work);
}
/*
@@ -266,8 +267,17 @@
/*
* This should probably queued elsewhere, but
* oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_ue_count);
+ continue;
+ }
+
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
@@ -301,15 +311,25 @@
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
- machine_check_print_event_info(evt, false);
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_queue_count);
+ continue;
+ }
+ machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
}
void machine_check_print_event_info(struct machine_check_event *evt,
- bool user_mode)
+ bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype;
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
+ uint64_t ea = 0, pa = 0;
+ int n = 0;
+ char dar_str[50];
+ char pa_str[50];
static const char *mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
@@ -356,6 +376,13 @@
"Store (timeout)",
"Page table walk Load/Store (timeout)",
};
+ static const char *mc_error_class[] = {
+ "Unknown",
+ "Hardware error",
+ "Probable Hardware error (some chance of software cause)",
+ "Software error",
+ "Probable Software error (some chance of hardware cause)",
+ };
/* Print things out */
if (evt->version != MCE_V1) {
@@ -370,9 +397,9 @@
break;
case MCE_SEV_WARNING:
level = KERN_WARNING;
- sevstr = "";
+ sevstr = "Warning";
break;
- case MCE_SEV_ERROR_SYNC:
+ case MCE_SEV_SEVERE:
level = KERN_ERR;
sevstr = "Severe";
break;
@@ -383,99 +410,145 @@
break;
}
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "Not recovered");
-
- if (user_mode) {
- printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
- evt->srr0, current->pid, current->comm);
- } else {
- printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
- (void *)evt->srr0);
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
}
- printk("%s Initiator: %s\n", level,
- evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
+ err_type = "UE";
subtype = evt->u.ue_error.ue_error_type <
ARRAY_SIZE(mc_ue_types) ?
mc_ue_types[evt->u.ue_error.ue_error_type]
: "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
if (evt->u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ue_error.effective_address);
+ ea = evt->u.ue_error.effective_address;
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
- level, evt->u.ue_error.physical_address);
+ pa = evt->u.ue_error.physical_address;
break;
case MCE_ERROR_TYPE_SLB:
+ err_type = "SLB";
subtype = evt->u.slb_error.slb_error_type <
ARRAY_SIZE(mc_slb_types) ?
mc_slb_types[evt->u.slb_error.slb_error_type]
: "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
if (evt->u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.slb_error.effective_address);
+ ea = evt->u.slb_error.effective_address;
break;
case MCE_ERROR_TYPE_ERAT:
+ err_type = "ERAT";
subtype = evt->u.erat_error.erat_error_type <
ARRAY_SIZE(mc_erat_types) ?
mc_erat_types[evt->u.erat_error.erat_error_type]
: "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
if (evt->u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.erat_error.effective_address);
+ ea = evt->u.erat_error.effective_address;
break;
case MCE_ERROR_TYPE_TLB:
+ err_type = "TLB";
subtype = evt->u.tlb_error.tlb_error_type <
ARRAY_SIZE(mc_tlb_types) ?
mc_tlb_types[evt->u.tlb_error.tlb_error_type]
: "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
if (evt->u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.tlb_error.effective_address);
+ ea = evt->u.tlb_error.effective_address;
break;
case MCE_ERROR_TYPE_USER:
+ err_type = "User";
subtype = evt->u.user_error.user_error_type <
ARRAY_SIZE(mc_user_types) ?
mc_user_types[evt->u.user_error.user_error_type]
: "Unknown";
- printk("%s Error type: User [%s]\n", level, subtype);
if (evt->u.user_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.user_error.effective_address);
+ ea = evt->u.user_error.effective_address;
break;
case MCE_ERROR_TYPE_RA:
+ err_type = "Real address";
subtype = evt->u.ra_error.ra_error_type <
ARRAY_SIZE(mc_ra_types) ?
mc_ra_types[evt->u.ra_error.ra_error_type]
: "Unknown";
- printk("%s Error type: Real address [%s]\n", level, subtype);
if (evt->u.ra_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ra_error.effective_address);
+ ea = evt->u.ra_error.effective_address;
break;
case MCE_ERROR_TYPE_LINK:
+ err_type = "Link";
subtype = evt->u.link_error.link_error_type <
ARRAY_SIZE(mc_link_types) ?
mc_link_types[evt->u.link_error.link_error_type]
: "Unknown";
- printk("%s Error type: Link [%s]\n", level, subtype);
if (evt->u.link_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.link_error.effective_address);
+ ea = evt->u.link_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
+ err_type = "Unknown";
+ subtype = "";
break;
}
+
+ dar_str[0] = pa_str[0] = '\0';
+ if (ea && evt->srr0 != ea) {
+ /* Load/Store address */
+ n = sprintf(dar_str, "DAR: %016llx ", ea);
+ if (pa)
+ sprintf(dar_str + n, "paddr: %016llx ", pa);
+ } else if (pa) {
+ sprintf(pa_str, " paddr: %016llx", pa);
+ }
+
+ printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
+ err_type, subtype, dar_str,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+
+ if (in_guest || user_mode) {
+ printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+ level, evt->cpu, current->pid, current->comm,
+ in_guest ? "Guest " : "", evt->srr0, pa_str);
+ } else {
+ printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+ level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+ }
+
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
+ subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
+ mc_error_class[evt->error_class] : "Unknown";
+ printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
@@ -488,10 +561,13 @@
{
long handled = 0;
- __this_cpu_inc(irq_stat.mce_exceptions);
+ hv_nmi_check_nonrecoverable(regs);
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
+ /*
+ * See if platform is capable of handling machine check.
+ */
+ if (ppc_md.machine_check_early)
+ handled = ppc_md.machine_check_early(regs);
return handled;
}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 3022d67..1cbf7f1 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Machine check exception handling CPU-side for power7 and power8
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2013 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -24,6 +11,7 @@
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/extable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
@@ -31,15 +19,17 @@
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
+#include <asm/extable.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
* mode, we could potentially race with page table updates.
*/
-static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
- unsigned long flags;
+ unsigned int shift;
+ unsigned long pfn, flags;
struct mm_struct *mm;
if (user_mode(regs))
@@ -48,19 +38,28 @@
mm = &init_mm;
local_irq_save(flags);
- if (mm == current->mm)
- ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
- else
- ptep = find_init_mm_pte(addr, NULL);
+ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+
+ if (!ptep || pte_special(*ptep)) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+
+ if (shift <= PAGE_SHIFT)
+ pfn = pte_pfn(*ptep);
+ else {
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
+ }
+
+out:
local_irq_restore(flags);
- if (!ptep || pte_special(*ptep))
- return ULONG_MAX;
- return pte_pfn(*ptep);
+ return pfn;
}
/* flush SLBs and reload */
#ifdef CONFIG_PPC_BOOK3S_64
-static void flush_and_reload_slb(void)
+void flush_and_reload_slb(void)
{
/* Invalidate all SLBs */
slb_flush_all_realmode();
@@ -95,8 +94,7 @@
return;
}
#endif
- /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
#define MCE_FLUSH_SLB 1
@@ -131,215 +129,234 @@
bool nip_valid; /* nip is a valid indicator of faulting address */
unsigned int error_type;
unsigned int error_subtype;
+ unsigned int error_class;
unsigned int initiator;
unsigned int severity;
+ bool sync_error;
};
static const struct mce_ierror_table mce_p7_ierror_table[] = {
{ 0x00000000001c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000100000, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000180000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
static const struct mce_ierror_table mce_p8_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000100000, true,
- MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000180000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008000000, true,
- MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008040000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
static const struct mce_ierror_table mce_p9_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000100000, true,
- MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000180000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008000000, true,
- MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008040000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000080c0000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008100000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008140000, false,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
{ 0x00000000081c0000, 0x0000000008180000, false,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
-{ 0x00000000081c0000, 0x00000000081c0000, true,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
struct mce_derror_table {
unsigned long dsisr_value;
bool dar_valid; /* dar is a valid indicator of faulting address */
unsigned int error_type;
unsigned int error_subtype;
+ unsigned int error_class;
unsigned int initiator;
unsigned int severity;
+ bool sync_error;
};
static const struct mce_derror_table mce_p7_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000040, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static const struct mce_derror_table mce_p8_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00002000, true,
- MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00001000, true,
MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000200, true,
MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static const struct mce_derror_table mce_p9_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00002000, true,
- MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00001000, true,
MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000200, false,
- MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000040, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000020, false,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000010, false,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000008, false,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
/*
@@ -392,6 +409,8 @@
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -404,6 +423,7 @@
/* now fill in mce_error_info */
mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
switch (table[i].error_type) {
case MCE_ERROR_TYPE_UE:
mce_err->u.ue_error_type = table[i].error_subtype;
@@ -427,11 +447,12 @@
mce_err->u.link_error_type = table[i].error_subtype;
break;
}
+ mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
if (table[i].nip_valid) {
*addr = regs->nip;
- if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
unsigned long pfn;
@@ -448,8 +469,10 @@
}
mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
- mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
return 0;
}
@@ -473,6 +496,8 @@
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
@@ -496,6 +521,7 @@
/* now fill in mce_error_info */
mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
switch (table[i].error_type) {
case MCE_ERROR_TYPE_UE:
mce_err->u.ue_error_type = table[i].error_subtype;
@@ -519,18 +545,20 @@
mce_err->u.link_error_type = table[i].error_subtype;
break;
}
+ mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
- else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ else if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
/*
* We do a maximum of 4 nested MCE calls, see
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
- mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
+ mce_find_instr_ea_and_phys(regs, addr,
+ phys_addr);
}
found = 1;
}
@@ -539,15 +567,26 @@
return handled;
mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
- mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
return 0;
}
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
{
long handled = 0;
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ return 1;
+ }
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -580,7 +619,7 @@
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ handled = mce_handle_ue_error(regs, &mce_err);
save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 0b196cd..974f65f 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -8,11 +9,6 @@
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*
* setjmp/longjmp code by Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
#include <asm/unistd.h>
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 695b24a..82df4b0 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -10,12 +11,6 @@
* GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
* PPC44x port. Copyright (C) 2011, IBM Corporation
* Author: Suzuki Poulose <suzuki@in.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/sys.h>
@@ -46,11 +41,10 @@
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r3,THREAD_INFO_GAP
+ stw r3, THREAD+KSP_LIMIT(r2)
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
bl __do_softirq
lwz r10,8(r1)
lwz r1,0(r1)
@@ -60,17 +54,16 @@
blr
/*
- * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
+ * void call_do_irq(struct pt_regs *regs, void *sp);
*/
_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r4,THREAD_INFO_GAP
+ stw r4, THREAD+KSP_LIMIT(r2)
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
mr r1,r4
stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
bl __do_irq
lwz r10,8(r1)
lwz r1,0(r1)
@@ -153,7 +146,7 @@
mtctr r5
bctr
-#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx)
+#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_PPC_BOOK3S_32)
/* This gets called by via-pmu.c to switch the PLL selection
* on 750fx CPU. This function should really be moved to some
@@ -183,10 +176,13 @@
or r4,r4,r5
mtspr SPRN_HID1,r4
+#ifdef CONFIG_SMP
/* Store new HID1 image */
- CURRENT_THREAD_INFO(r6, r1)
- lwz r6,TI_CPU(r6)
+ lwz r6,TASK_CPU(r2)
slwi r6,r6,2
+#else
+ li r6, 0
+#endif
addis r6,r6,nap_save_hid1@ha
stw r4,nap_save_hid1@l(r6)
@@ -223,7 +219,7 @@
mtmsr r7
blr
-#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */
+#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
/*
* complement mask on the msr then "or" some values on.
@@ -296,22 +292,20 @@
iccci 0,r3
#endif
#elif defined(CONFIG_FSL_BOOKE)
-BEGIN_FTR_SECTION
+#ifdef CONFIG_E200
mfspr r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr SPRN_L1CSR0,r3
isync
blr
-END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+#endif
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
+#elif defined(CONFIG_PPC_BOOK3S_601)
+ blr /* for 601, do nothing */
#else
- mfspr r3,SPRN_PVR
- rlwinm r3,r3,16,16,31
- cmpwi 0,r3,1
- beqlr /* for 601, do nothing */
/* 603/604 processor - use invalidate-all bit in HID0 */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_ICFI
@@ -330,10 +324,10 @@
* flush_icache_range(unsigned long start, unsigned long stop)
*/
_GLOBAL(flush_icache_range)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ blr /* for 601 and e200, do nothing */
+#else
rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT
subf r4,r3,r4
addi r4,r4,L1_CACHE_BYTES - 1
@@ -359,6 +353,7 @@
sync /* additional sync needed on g4 */
isync
blr
+#endif
_ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
@@ -366,15 +361,15 @@
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
+ * This is a no-op on the 601 and e200 which have a unified cache.
*
* void __flush_dcache_icache(void *page)
*/
_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+#else
rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
mtctr r4
@@ -402,6 +397,7 @@
sync
isync
blr
+#endif
#ifndef CONFIG_BOOKE
/*
@@ -413,10 +409,10 @@
* void __flush_dcache_icache_phys(unsigned long physaddr)
*/
_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ blr /* for 601 and e200, do nothing */
+#else
mfmsr r10
rlwinm r0,r10,0,28,26 /* clear DR */
mtmsr r0
@@ -437,6 +433,7 @@
mtmsr r10 /* restore DR */
isync
blr
+#endif
#endif /* CONFIG_BOOKE */
/*
@@ -456,7 +453,12 @@
stwu r9,16(r3)
_GLOBAL(copy_page)
+ rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
+
+0: twnei r5, 0 /* WARN if r3 is not cache aligned */
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
addi r4,r4,-4
li r5,4
@@ -599,7 +601,7 @@
#ifdef CONFIG_SMP
_GLOBAL(start_secondary_resume)
/* Reset stack */
- CURRENT_THREAD_INFO(r1, r1)
+ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 262ba94..b55a7b4 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -6,12 +7,6 @@
* and Paul Mackerras.
* Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/sys.h>
@@ -115,58 +110,6 @@
EXPORT_SYMBOL(flush_icache_range)
/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL_TOC(flush_dcache_range)
-
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-_GLOBAL(flush_inval_dcache_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- sync
- isync
- mtctr r8
-0: dcbf 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- blr
-
-
-/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 2d861a3..df649ac 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for powerpc.
Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
Copyright (C) 2008 Freescale Semiconductor, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/elf.h>
#include <linux/moduleloader.h>
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 88d8377..d7134c6 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for PPC.
Copyright (C) 2001 Rusty Russell.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -172,10 +160,12 @@
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
- && entry->jump[1] == 0x398c0000 + (val & 0xffff))
- return 1;
- return 0;
+ if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val)))
+ return 0;
+ if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) |
+ PPC_LO(val)))
+ return 0;
+ return 1;
}
/* Set up a trampoline in the PLT to bounce us to the distant function */
@@ -200,10 +190,16 @@
entry++;
}
- entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
- entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
- entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
- entry->jump[3] = 0x4e800420; /* bctr */
+ /*
+ * lis r12, sym@ha
+ * addi r12, r12, sym@l
+ * mtctr r12
+ * bctr
+ */
+ entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val);
+ entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val);
+ entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12);
+ entry->jump[3] = PPC_INST_BCTR;
pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
return (uint32_t)entry;
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 8661eea..007606a 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for PPC64.
Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -133,20 +121,27 @@
* the stub, but it's significantly shorter to put these values at the
* end of the stub code, and patch the stub address (32-bits relative
* to the TOC ptr, r2) into the stub.
+ *
+ * addis r11,r2, <high>
+ * addi r11,r11, <low>
+ * std r2,R2_STACK_OFFSET(r1)
+ * ld r12,32(r11)
+ * ld r2,40(r11)
+ * mtctr r12
+ * bctr
*/
-
static u32 ppc64_stub_insns[] = {
- 0x3d620000, /* addis r11,r2, <high> */
- 0x396b0000, /* addi r11,r11, <low> */
+ PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2),
+ PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11),
/* Save current r2 value in magic place on the stack. */
- 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */
- 0xe98b0020, /* ld r12,32(r11) */
+ PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET,
+ PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32,
#ifdef PPC64_ELF_ABI_v1
/* Set up new r2 from function descriptor */
- 0xe84b0028, /* ld r2,40(r11) */
+ PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40,
#endif
- 0x7d8903a6, /* mtctr r12 */
- 0x4e800420 /* bctr */
+ PPC_INST_MTCTR | __PPC_RS(R12),
+ PPC_INST_BCTR,
};
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -400,13 +395,6 @@
return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
}
-/* Both low and high 16 bits are added as SIGNED additions, so if low
- 16 bits has high bit set, high 16 bits must be adjusted. These
- macros do that (stolen from binutils). */
-#define PPC_LO(v) ((v) & 0xffff)
-#define PPC_HI(v) (((v) >> 16) & 0xffff)
-#define PPC_HA(v) PPC_HI ((v) + 0x8000)
-
/* Patch stub to reference function and correct r2 value. */
static inline int create_stub(const Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
@@ -711,18 +699,21 @@
* ld r2, ...(r12)
* add r2, r2, r12
*/
- if ((((uint32_t *)location)[0] & ~0xfffc)
- != 0xe84c0000)
+ if ((((uint32_t *)location)[0] & ~0xfffc) !=
+ (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12)))
break;
- if (((uint32_t *)location)[1] != 0x7c426214)
+ if (((uint32_t *)location)[1] !=
+ (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12)))
break;
/*
* If found, replace it with:
* addis r2, r12, (.TOC.-func)@ha
- * addi r2, r12, (.TOC.-func)@l
+ * addi r2, r2, (.TOC.-func)@l
*/
- ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
- ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+ ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) |
+ __PPC_RA(R12) | PPC_HA(value);
+ ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) |
+ __PPC_RA(R2) | PPC_LO(value);
break;
case R_PPC64_REL16_HA:
@@ -776,12 +767,19 @@
{
struct ppc64_stub_entry *entry;
unsigned int i, num_stubs;
+ /*
+ * ld r12,PACATOC(r13)
+ * addis r12,r12,<high>
+ * addi r12,r12,<low>
+ * mtctr r12
+ * bctr
+ */
static u32 stub_insns[] = {
- 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
- 0x3d8c0000, /* addis r12,r12,<high> */
- 0x398c0000, /* addi r12,r12,<low> */
- 0x7d8903a6, /* mtctr r12 */
- 0x4e800420, /* bctr */
+ PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC,
+ PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12),
+ PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12),
+ PPC_INST_MTCTR | __PPC_RS(R12),
+ PPC_INST_BCTR,
};
long reladdr;
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index f219765..a5d25be 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2006-2007, Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 0000000..bcdad15
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT 0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+ .long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 22e9d28..fb4f610 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -1,18 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* c 2001 PPC 64 Team, IBM Corp
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* /dev/nvram driver for PPC64
- *
- * This perhaps should live in drivers/char
- *
- * TODO: Split the /dev/nvram part (that one can use
- * drivers/char/generic_nvram.c) from the arch & partition
- * parsing code.
*/
#include <linux/types.h>
@@ -563,8 +553,6 @@
nvram_pstore_info.buf = oops_data;
nvram_pstore_info.bufsize = oops_data_sz;
- spin_lock_init(&nvram_pstore_info.buf_lock);
-
rc = pstore_register(&nvram_pstore_info);
if (rc && (rc != -EPERM))
/* Print error only when pstore.backend == nvram */
@@ -716,136 +704,6 @@
spin_unlock_irqrestore(&lock, flags);
}
-static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
-{
- if (ppc_md.nvram_size == NULL)
- return -ENODEV;
- return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE,
- ppc_md.nvram_size());
-}
-
-
-static ssize_t dev_nvram_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
-
- if (!ppc_md.nvram_size) {
- ret = -ENODEV;
- goto out;
- }
-
- size = ppc_md.nvram_size();
- if (size < 0) {
- ret = size;
- goto out;
- }
-
- if (*ppos >= size) {
- ret = 0;
- goto out;
- }
-
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
-
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = ppc_md.nvram_read(tmp, count, ppos);
- if (ret <= 0)
- goto out;
-
- if (copy_to_user(buf, tmp, ret))
- ret = -EFAULT;
-
-out:
- kfree(tmp);
- return ret;
-
-}
-
-static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
-
- ret = -ENODEV;
- if (!ppc_md.nvram_size)
- goto out;
-
- ret = 0;
- size = ppc_md.nvram_size();
- if (*ppos >= size || size < 0)
- goto out;
-
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
-
- tmp = memdup_user(buf, count);
- if (IS_ERR(tmp)) {
- ret = PTR_ERR(tmp);
- goto out;
- }
-
- ret = ppc_md.nvram_write(tmp, count, ppos);
-
- kfree(tmp);
-out:
- return ret;
-}
-
-static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- switch(cmd) {
-#ifdef CONFIG_PPC_PMAC
- case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
- printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
- case IOC_NVRAM_GET_OFFSET: {
- int part, offset;
-
- if (!machine_is(powermac))
- return -EINVAL;
- if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
- return -EFAULT;
- if (part < pmac_nvram_OF || part > pmac_nvram_NR)
- return -EINVAL;
- offset = pmac_get_partition(part);
- if (offset < 0)
- return offset;
- if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
- return -EFAULT;
- return 0;
- }
-#endif /* CONFIG_PPC_PMAC */
- default:
- return -EINVAL;
- }
-}
-
-static const struct file_operations nvram_fops = {
- .owner = THIS_MODULE,
- .llseek = dev_nvram_llseek,
- .read = dev_nvram_read,
- .write = dev_nvram_write,
- .unlocked_ioctl = dev_nvram_ioctl,
-};
-
-static struct miscdevice nvram_dev = {
- NVRAM_MINOR,
- "nvram",
- &nvram_fops
-};
-
-
#ifdef DEBUG_NVRAM
static void __init nvram_print_partitions(char * label)
{
@@ -993,6 +851,8 @@
long size = 0;
int rc;
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
/* Convert sizes from bytes to blocks */
req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
@@ -1193,22 +1053,3 @@
kfree(header);
return err;
}
-
-static int __init nvram_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
-
- if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
- return -ENODEV;
-
- rc = misc_register(&nvram_dev);
- if (rc != 0) {
- printk(KERN_ERR "nvram_init: failed to register device\n");
- return rc;
- }
-
- return rc;
-}
-device_initcall(nvram_init);
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index becaec9..427fc22 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
* and Arnd Bergmann, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#undef DEBUG
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 8237884..024f7aa 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Code for Kernel probes Jump optimization.
*
* Copyright 2017, Anju T, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kprobes.h>
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 98a3aee..cf38352 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Code to prepare detour buffer for optprobes in Kernel.
*
* Copyright 2017, Anju T, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ee3e6d..949eceb 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -1,22 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/smp.h>
#include <linux/export.h>
#include <linux/memblock.h>
#include <linux/sched/task.h>
+#include <linux/numa.h>
#include <asm/lppaca.h>
#include <asm/paca.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
#include "setup.h"
@@ -27,7 +26,7 @@
static void *__init alloc_paca_data(unsigned long size, unsigned long align,
unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
int nid;
/*
@@ -36,27 +35,62 @@
* which will put its paca in the right place.
*/
if (cpu == boot_cpuid) {
- nid = -1;
+ nid = NUMA_NO_NODE;
memblock_set_bottom_up(true);
} else {
nid = early_cpu_to_node(cpu);
}
- pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(size, align, limit);
- if (!pa)
- panic("cannot allocate paca data");
- }
+ ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+ limit, nid);
+ if (!ptr)
+ panic("cannot allocate paca data");
if (cpu == boot_cpuid)
memblock_set_bottom_up(false);
- return __va(pa);
+ return ptr;
}
#ifdef CONFIG_PPC_PSERIES
+#define LPPACA_SIZE 0x400
+
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+ static unsigned long shared_lppaca_size;
+ static void *shared_lppaca;
+ void *ptr;
+
+ if (!shared_lppaca) {
+ memblock_set_bottom_up(true);
+
+ shared_lppaca =
+ memblock_alloc_try_nid(shared_lppaca_total_size,
+ PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!shared_lppaca)
+ panic("cannot allocate shared data");
+
+ memblock_set_bottom_up(false);
+ uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+ shared_lppaca_total_size >> PAGE_SHIFT);
+ }
+
+ ptr = shared_lppaca + shared_lppaca_size;
+ shared_lppaca_size += size;
+
+ /*
+ * This is very early in boot, so no harm done if the kernel crashes at
+ * this point.
+ */
+ BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
+
+ return ptr;
+}
+
/*
* See asm/lppaca.h for more detail.
*
@@ -70,7 +104,7 @@
*lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(0x400),
+ .size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
@@ -80,19 +114,22 @@
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
- size_t size = 0x400;
- BUILD_BUG_ON(size < sizeof(struct lppaca));
+ BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
if (early_cpu_has_feature(CPU_FTR_HVMODE))
return NULL;
- lp = alloc_paca_data(size, 0x400, limit, cpu);
+ if (is_secure_guest())
+ lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+ else
+ lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
+
init_lppaca(lp);
return lp;
}
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -118,7 +155,6 @@
}
s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
- memset(s, 0, sizeof(*s));
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -198,7 +234,11 @@
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
- paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+ paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
+ if (!paca_ptrs)
+ panic("Failed to allocate %d bytes for paca pointers\n",
+ paca_ptrs_size);
+
memset(paca_ptrs, 0x88, paca_ptrs_size);
}
@@ -222,7 +262,6 @@
paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
limit, cpu);
paca_ptrs[cpu] = paca;
- memset(paca, 0, sizeof(struct paca_struct));
initialise_paca(paca, cpu);
#ifdef CONFIG_PPC_PSERIES
@@ -266,12 +305,12 @@
get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
- VM_BUG_ON(!mm->context.slb_addr_limit);
- get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- memcpy(&get_paca()->mm_ctx_low_slices_psize,
- &context->low_slices_psize, sizeof(context->low_slices_psize));
- memcpy(&get_paca()->mm_ctx_high_slices_psize,
- &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+ VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+ get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
+ memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+ LOW_SLICE_ARRAY_SZ);
+ memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+ TASK_SLICE_ARRAY_SZ(context));
#else /* CONFIG_PPC_MM_SLICES */
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 88e4f69..1c448cf 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Contains common pci routines for ALL ppc platform
* (based on pci_32.c and pci_64.c)
@@ -9,11 +10,6 @@
* Rework, based on alpha PCI code.
*
* Common pmac/prep/chrp pci routines. -- Cort
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -32,6 +28,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/vgaarb.h>
+#include <linux/numa.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -62,19 +59,13 @@
EXPORT_SYMBOL(isa_mem_base);
-static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops;
+static const struct dma_map_ops *pci_dma_ops;
void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
-const struct dma_map_ops *get_pci_dma_ops(void)
-{
- return pci_dma_ops;
-}
-EXPORT_SYMBOL(get_pci_dma_ops);
-
/*
* This function should run under locking protection, specifically
* hose_spinlock.
@@ -132,7 +123,7 @@
int nid = of_node_to_nid(dev);
if (nid < 0 || !node_online(nid))
- nid = -1;
+ nid = NUMA_NO_NODE;
PHB_SET_NODE(phb, nid);
}
@@ -357,6 +348,17 @@
return NULL;
}
+struct pci_controller *pci_find_controller_for_domain(int domain_nr)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->global_number == domain_nr)
+ return hose;
+
+ return NULL;
+}
+
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -972,7 +974,7 @@
/* Hook up default DMA ops */
set_dma_ops(&dev->dev, pci_dma_ops);
- set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+ dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
/* Additional platform DMA/iommu setup */
phb = pci_bus_to_host(dev->bus);
@@ -1377,10 +1379,6 @@
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index cf47b1a..fc62c4b 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
*
@@ -7,11 +8,6 @@
* Updates, 2005, John Rose <johnrose@austin.ibm.com>
* Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
* Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/pci.h>
@@ -59,11 +55,18 @@
void pcibios_release_device(struct pci_dev *dev)
{
struct pci_controller *phb = pci_bus_to_host(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
eeh_remove_device(dev);
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
+
+ /* free()ing the pci_dn has been deferred to us, do it now */
+ if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+ pci_dbg(dev, "freeing dead pdn\n");
+ kfree(pdn);
+ }
}
/**
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d63b488..b49e106 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common pmac/prep/chrp pci routines. -- Cort
*/
@@ -10,14 +11,13 @@
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/syscalls.h>
#include <linux/irq.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <linux/syscalls.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -204,7 +204,11 @@
struct property* of_prop;
struct device_node *dn;
- of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0);
+ of_prop = memblock_alloc(sizeof(struct property) + 256,
+ SMP_CACHE_BYTES);
+ if (!of_prop)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct property) + 256);
dn = of_find_node_by_path("/");
if (dn) {
memset(of_prop, -1, sizeof(struct property) + 256);
@@ -259,6 +263,10 @@
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index dff28f9..f83d1f6 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Port for PPC64 David Engebretsen, IBM Corp.
* Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
*
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
* Rework, based on alpha PCI code.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -58,14 +54,20 @@
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
- pci_bus_add_devices(hose->bus);
- }
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Add devices. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ pci_bus_add_devices(hose->bus);
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;
@@ -159,7 +161,7 @@
/* Establish the mapping */
if (__ioremap_at(phys_page, area->addr, size_page,
- pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
+ pgprot_noncached(PAGE_KERNEL)) == NULL)
return -ENOMEM;
/* Fixup hose IO resource */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index ab147a1..9524009 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* pci_dn.c
*
* Copyright (C) 2001 Todd Inglett, IBM Corporation
*
* PCI manipulation via device_nodes.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -336,6 +323,7 @@
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
+ struct pci_dev *pdev;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -349,12 +337,28 @@
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
+ /* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
- dn->data = NULL;
- kfree(pdn);
+ /*
+ * At this point we *might* still have a pci_dev that was
+ * instantiated from this pci_dn. So defer free()ing it until
+ * the pci_dev's release function is called.
+ */
+ pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+ pdn->busno, pdn->devfn);
+ if (pdev) {
+ /* NB: pdev has a ref to dn */
+ pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+ pdn->flags |= PCI_DN_FLAG_DEAD;
+ } else {
+ dn->data = NULL;
+ kfree(pdn);
+ }
+
+ pci_dev_put(pdev);
}
EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 98f0472..f91d7e9 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Helper routines to scan the device tree for PCI devices and busses
*
@@ -8,10 +9,6 @@
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
* Rework, based on alpha PCI code.
* Copyright (c) 2009 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
*/
#include <linux/pci.h>
@@ -37,29 +34,75 @@
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit# 33222222 22221111 11111100 00000000
+ * 10987654 32109876 54321098 76543210
+ * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell: llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n is 0 if the address is relocatable, 1 otherwise
+ * p is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t is 1 if the address is aliased (for non-relocatable I/O),
+ * below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss is the space code, denoting the address space:
+ * 00 denotes Configuration Space
+ * 01 denotes I/O Space
+ * 10 denotes 32-bit-address Memory Space
+ * 11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd is the 5-bit Device Number
+ * fff is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
*/
+#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC (1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
+#define OF_PCI_ADDR0_ALIAS (1UL<<29)
+#define OF_PCI_ADDR0_BUS 0x00FF0000UL
+#define OF_PCI_ADDR0_DEV 0x0000F800UL
+#define OF_PCI_ADDR0_FN 0x00000700UL
+#define OF_PCI_ADDR0_BARREG 0x000000FFUL
+
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
- unsigned int flags = 0;
+ unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
- if (addr0 & 0x02000000) {
+ if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+ if (addr0 & OF_PCI_ADDR0_ALIAS)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+ if (addr0 & OF_PCI_ADDR0_PREFETCH)
+ flags |= IORESOURCE_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_PREFETCH;
+
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
- if (!bridge && (addr0 & 0xff) == 0x30)
+ if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
- } else if (addr0 & 0x01000000)
+
+ } else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
if (flags)
flags |= IORESOURCE_SIZEALIGN;
+
return flags;
}
@@ -80,10 +123,16 @@
const __be32 *addrs;
u32 i;
int proplen;
+ bool mark_unset = false;
addrs = of_get_property(node, "assigned-addresses", &proplen);
- if (!addrs)
- return;
+ if (!addrs || !proplen) {
+ addrs = of_get_property(node, "reg", &proplen);
+ if (!addrs || !proplen)
+ return;
+ mark_unset = true;
+ }
+
pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
for (; proplen >= 20; proplen -= 20, addrs += 5) {
flags = pci_parse_of_flags(of_read_number(addrs, 1), 0);
@@ -108,6 +157,8 @@
continue;
}
res->flags = flags;
+ if (mark_unset)
+ res->flags |= IORESOURCE_UNSET;
res->name = pci_name(dev);
region.start = base;
region.end = base + size - 1;
@@ -125,16 +176,13 @@
struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
- const char *type;
dev = pci_alloc_dev(bus);
if (!dev)
return NULL;
- type = of_get_property(node, "device_type", NULL);
- if (type == NULL)
- type = "";
- pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
+ pr_debug(" create device, devfn: %x, type: %s\n", devfn,
+ of_node_get_device_type(node));
dev->dev.of_node = of_node_get(node);
dev->dev.parent = bus->bridge;
@@ -167,12 +215,12 @@
/* Early fixups, before probing the BARs */
pci_fixup_device(pci_fixup_early, dev);
- if (!strcmp(type, "pci") || !strcmp(type, "pciex")) {
+ if (of_node_is_type(node, "pci") || of_node_is_type(node, "pciex")) {
/* a PCI-PCI bridge */
dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
dev->rom_base_reg = PCI_ROM_ADDRESS1;
set_pcie_hotplug_bridge(dev);
- } else if (!strcmp(type, "cardbus")) {
+ } else if (of_node_is_type(node, "cardbus")) {
dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
} else {
dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index 58eaa3d..15414c8 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/kernel/pmc.c
*
@@ -5,11 +6,6 @@
* Includes code formerly from arch/ppc/kernel/perfmon.c:
* Author: Andy Fleming
* Copyright (c) 2004 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -29,7 +25,7 @@
{
#if defined(CONFIG_FSL_EMB_PERFMON)
mtpmr(PMRN_PMGC0, mfpmr(PMRN_PMGC0) & ~PMGC0_PMIE);
-#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx)
+#elif defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
if (cur_cpu_spec->pmc_type == PPC_PMC_IBM)
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~(MMCR0_PMXE|MMCR0_PMAO));
#else
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index a27c914..2346f8c 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _PPC64_PPC32_H
#define _PPC64_PPC32_H
@@ -7,11 +8,6 @@
/*
* Data types and macros for providing 32b PowerPC support.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/* These are here to support 32-bit syscalls on a 64-bit kernel. */
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 6d1b42e..f3bd0bb 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 1996 Paul Mackerras.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* NOTE: assert(sizeof(buf) > 23 * sizeof(long))
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 9bfbd80..be3758d 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bb6ac47..639ceae 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from "arch/i386/kernel/process.c"
* Copyright (C) 1995 Linus Torvalds
@@ -7,11 +8,6 @@
*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -43,6 +39,7 @@
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
#include <linux/pkeys.h>
+#include <linux/seq_buf.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -65,6 +62,8 @@
#include <asm/livepatch.h>
#include <asm/cpu_has_feature.h>
#include <asm/asm-prototypes.h>
+#include <asm/stacktrace.h>
+#include <asm/hw_breakpoint.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -102,27 +101,8 @@
}
}
-static inline bool msr_tm_active(unsigned long msr)
-{
- return MSR_TM_ACTIVE(msr);
-}
-
-static bool tm_active_with_fp(struct task_struct *tsk)
-{
- return msr_tm_active(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_FP);
-}
-
-static bool tm_active_with_altivec(struct task_struct *tsk)
-{
- return msr_tm_active(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_VEC);
-}
#else
-static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
-static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
-static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -137,7 +117,8 @@
}
early_param("ppc_strict_facility_enable", enable_strict_msr_control);
-unsigned long msr_check_and_set(unsigned long bits)
+/* notrace because it's called by restore_math */
+unsigned long notrace msr_check_and_set(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -156,7 +137,8 @@
}
EXPORT_SYMBOL_GPL(msr_check_and_set);
-void __msr_check_and_clear(unsigned long bits)
+/* notrace because it's called by restore_math */
+void notrace __msr_check_and_clear(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -180,7 +162,7 @@
save_fpu(tsk);
msr = tsk->thread.regs->msr;
- msr &= ~MSR_FP;
+ msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
@@ -247,7 +229,8 @@
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_fpu(current);
}
@@ -256,7 +239,7 @@
static int restore_fp(struct task_struct *tsk)
{
- if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
+ if (tsk->thread.load_fp) {
load_fp_state(¤t->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -311,7 +294,8 @@
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_altivec(current);
}
@@ -337,8 +321,7 @@
static int restore_altivec(struct task_struct *tsk)
{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -397,7 +380,8 @@
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_vsx(current);
}
@@ -499,13 +483,14 @@
if (!tsk->thread.regs)
return;
+ check_if_tm_restore_required(tsk);
+
usermsr = tsk->thread.regs->msr;
if ((usermsr & msr_all_available) == 0)
return;
msr_check_and_set(msr_all_available);
- check_if_tm_restore_required(tsk);
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
@@ -526,11 +511,21 @@
}
EXPORT_SYMBOL(giveup_all);
-void restore_math(struct pt_regs *regs)
+/*
+ * The exception exit path calls restore_math() with interrupts hard disabled
+ * but the soft irq state not "reconciled". ftrace code that calls
+ * local_irq_save/restore causes warnings.
+ *
+ * Rather than complicate the exit path, just don't trace restore_math. This
+ * could be done by having ftrace entry code check for this un-reconciled
+ * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
+ * temporarily fix it up for the duration of the ftrace call.
+ */
+void notrace restore_math(struct pt_regs *regs)
{
unsigned long msr;
- if (!msr_tm_active(regs->msr) &&
+ if (!MSR_TM_ACTIVE(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -591,12 +586,11 @@
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
- save_all(tsk);
-
#ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
#endif
+ save_all(tsk);
preempt_enable();
}
@@ -620,8 +614,6 @@
void do_break (struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
- siginfo_t info;
-
current->thread.trap_nr = TRAP_HWBKPT;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
@@ -634,12 +626,7 @@
hw_breakpoint_disable();
/* Deliver the signal to userspace */
- clear_siginfo(&info);
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -793,39 +780,11 @@
return __set_dabr(dabr, dabrx);
}
-static inline int set_dawr(struct arch_hw_breakpoint *brk)
-{
- unsigned long dawr, dawrx, mrd;
-
- dawr = brk->address;
-
- dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
- << (63 - 58); //* read/write bits */
- dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
- << (63 - 59); //* translate */
- dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
- >> 3; //* PRIM bits */
- /* dawr length is stored in field MDR bits 48:53. Matches range in
- doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
- 0b111111=64DW.
- brk->len is in bytes.
- This aligns up to double word size, shifts and does the bias.
- */
- mrd = ((brk->len + 7) >> 3) - 1;
- dawrx |= (mrd & 0x3f) << (63 - 53);
-
- if (ppc_md.set_dawr)
- return ppc_md.set_dawr(dawr, dawrx);
- mtspr(SPRN_DAWR, dawr);
- mtspr(SPRN_DAWRX, dawrx);
- return 0;
-}
-
void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk));
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
// Power8 or later
set_dawr(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -839,8 +798,8 @@
/* Check if we have DAWR or DABR hardware */
bool ppc_breakpoint_available(void)
{
- if (cpu_has_feature(CPU_FTR_DAWR))
- return true; /* POWER8 DAWR */
+ if (dawr_enabled())
+ return true; /* POWER8 DAWR or POWER9 forced DAWR */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return false; /* POWER9 with DAWR disabled */
/* DABR: Everything but POWER8 and POWER9 */
@@ -1160,11 +1119,6 @@
thread_pkey_regs_restore(new_thread, old_thread);
}
-#ifdef CONFIG_PPC_BOOK3S_64
-#define CP_SIZE 128
-static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE)));
-#endif
-
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
@@ -1240,8 +1194,8 @@
batch->active = 1;
}
- if (current_thread_info()->task->thread.regs) {
- restore_math(current_thread_info()->task->thread.regs);
+ if (current->thread.regs) {
+ restore_math(current->thread.regs);
/*
* The copy-paste buffer can only store into foreign real
@@ -1251,7 +1205,7 @@
* mappings, we must issue a cp_abort to clear any state and
* prevent snooping, corruption or a covert channel.
*/
- if (current_thread_info()->task->thread.used_vas)
+ if (current->thread.used_vas)
asm volatile(PPC_CP_ABORT);
}
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -1259,17 +1213,16 @@
return last;
}
-static int instructions_to_print = 16;
+#define NR_INSN_TO_PRINT 16
static void show_instructions(struct pt_regs *regs)
{
int i;
- unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
- sizeof(int));
+ unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
printk("Instruction dump:");
- for (i = 0; i < instructions_to_print; i++) {
+ for (i = 0; i < NR_INSN_TO_PRINT; i++) {
int instr;
if (!(i % 8))
@@ -1284,7 +1237,7 @@
#endif
if (!__kernel_text_address(pc) ||
- probe_kernel_address((unsigned int __user *)pc, instr)) {
+ probe_kernel_address((const void *)pc, instr)) {
pr_cont("XXXXXXXX ");
} else {
if (regs->nip == pc)
@@ -1302,43 +1255,43 @@
void show_user_instructions(struct pt_regs *regs)
{
unsigned long pc;
- int i;
+ int n = NR_INSN_TO_PRINT;
+ struct seq_buf s;
+ char buf[96]; /* enough for 8 times 9 + 2 chars */
- pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int));
+ pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
/*
* Make sure the NIP points at userspace, not kernel text/data or
* elsewhere.
*/
- if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) {
+ if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
current->comm, current->pid);
return;
}
- pr_info("%s[%d]: code: ", current->comm, current->pid);
+ seq_buf_init(&s, buf, sizeof(buf));
- for (i = 0; i < instructions_to_print; i++) {
- int instr;
+ while (n) {
+ int i;
- if (!(i % 8) && (i > 0)) {
- pr_cont("\n");
- pr_info("%s[%d]: code: ", current->comm, current->pid);
+ seq_buf_clear(&s);
+
+ for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
+ int instr;
+
+ if (probe_kernel_address((const void *)pc, instr)) {
+ seq_buf_printf(&s, "XXXXXXXX ");
+ continue;
+ }
+ seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
}
- if (probe_kernel_address((unsigned int __user *)pc, instr)) {
- pr_cont("XXXXXXXX ");
- } else {
- if (regs->nip == pc)
- pr_cont("<%08x> ", instr);
- else
- pr_cont("%08x ", instr);
- }
-
- pc += sizeof(int);
+ if (!seq_buf_has_overflowed(&s))
+ pr_info("%s[%d]: code: %s\n", current->comm,
+ current->pid, s.buffer);
}
-
- pr_cont("\n");
}
struct regbit {
@@ -1492,6 +1445,15 @@
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void arch_setup_new_exec(void)
+{
+ if (radix_enabled())
+ return;
+ hash__setup_new_exec();
+}
+#endif
+
int set_thread_uses_vas(void)
{
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1625,8 +1587,9 @@
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p,
+ unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1635,7 +1598,7 @@
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);
- klp_init_thread_info(ti);
+ klp_init_thread_info(p);
/* Copy registers */
sp -= sizeof(struct pt_regs);
@@ -1667,10 +1630,10 @@
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
+ childregs->gpr[13] = tls;
else
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ childregs->gpr[2] = tls;
}
f = ret_from_fork;
@@ -1692,8 +1655,7 @@
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
#ifdef CONFIG_PPC32
- p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
- _ALIGN_UP(sizeof(struct thread_info), 16);
+ p->thread.ksp_limit = (unsigned long)end_of_stack(p);
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
p->thread.ptrace_bps[0] = NULL;
@@ -1712,7 +1674,7 @@
p->thread.dscr = mfspr(SPRN_DSCR);
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
- p->thread.ppr = INIT_PPR;
+ childregs->ppr = DEFAULT_PPR;
p->thread.tidr = 0;
#endif
@@ -1720,6 +1682,8 @@
return 0;
}
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
/*
* Set up a thread for executing a new program
*/
@@ -1727,6 +1691,11 @@
{
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!radix_enabled())
+ preload_new_slb_context(start, sp);
+#endif
#endif
/*
@@ -1817,6 +1786,7 @@
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
+ current->thread.load_slb = 0;
current->thread.load_fp = 0;
memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
@@ -1989,21 +1959,14 @@
unsigned long stack_page;
unsigned long cpu = task_cpu(p);
- /*
- * Avoid crashing if the stack has overflowed and corrupted
- * task_cpu(p), which is in the thread_info struct.
- */
- if (cpu < NR_CPUS && cpu_possible(cpu)) {
- stack_page = (unsigned long) hardirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
+ stack_page = (unsigned long)hardirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
- stack_page = (unsigned long) softirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
- }
+ stack_page = (unsigned long)softirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
return 0;
}
@@ -2012,8 +1975,10 @@
{
unsigned long stack_page = (unsigned long)task_stack_page(p);
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
+ if (sp < THREAD_SIZE)
+ return 0;
+
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
return valid_irq_stack(sp, p, nbytes);
@@ -2021,7 +1986,7 @@
EXPORT_SYMBOL(validate_sp);
-unsigned long get_wchan(struct task_struct *p)
+static unsigned long __get_wchan(struct task_struct *p)
{
unsigned long ip, sp;
int count = 0;
@@ -2047,6 +2012,20 @@
return 0;
}
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ret;
+
+ if (!try_get_task_stack(p))
+ return 0;
+
+ ret = __get_wchan(p);
+
+ put_task_stack(p);
+
+ return ret;
+}
+
static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
void show_stack(struct task_struct *tsk, unsigned long *stack)
@@ -2055,14 +2034,17 @@
int count = 0;
int firstframe = 1;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- int curr_frame = current->curr_ret_stack;
- extern void return_to_handler(void);
- unsigned long rth = (unsigned long)return_to_handler;
+ unsigned long ret_addr;
+ int ftrace_idx = 0;
#endif
- sp = (unsigned long) stack;
if (tsk == NULL)
tsk = current;
+
+ if (!try_get_task_stack(tsk))
+ return;
+
+ sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
sp = current_stack_pointer();
@@ -2074,7 +2056,7 @@
printk("Call Trace:\n");
do {
if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
- return;
+ break;
stack = (unsigned long *) sp;
newsp = stack[0];
@@ -2082,11 +2064,10 @@
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if ((ip == rth) && curr_frame >= 0) {
- pr_cont(" (%pS)",
- (void *)current->ret_stack[curr_frame].ret);
- curr_frame--;
- }
+ ret_addr = ftrace_graph_ret_addr(current,
+ &ftrace_idx, ip, stack);
+ if (ret_addr != ip)
+ pr_cont(" (%pS)", (void *)ret_addr);
#endif
if (firstframe)
pr_cont(" (unreliable)");
@@ -2110,6 +2091,8 @@
sp = newsp;
} while (count++ < kstack_depth_to_print);
+
+ put_task_stack(tsk);
}
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c4d7078..6620f37 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Procedures for creating, accessing and interpreting the device tree.
*
@@ -6,11 +7,6 @@
*
* Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
* {engebret|bergner}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -59,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -124,12 +121,15 @@
size = fdt_totalsize(initial_boot_params);
if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
- overlaps_crashkernel(start, size) ||
- overlaps_initrd(start, size)) {
- p = __va(memblock_alloc(size, PAGE_SIZE));
+ !memblock_is_memory(start + size - 1) ||
+ overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) {
+ p = memblock_alloc_raw(size, PAGE_SIZE);
+ if (!p)
+ panic("Failed to allocate %lu bytes to move device tree\n",
+ size);
memcpy(p, initial_boot_params, size);
initial_boot_params = p;
- DBG("Moved device tree to 0x%p\n", p);
+ DBG("Moved device tree to 0x%px\n", p);
}
DBG("<- move_device_tree\n");
@@ -689,7 +689,7 @@
{
phys_addr_t limit;
- DBG(" -> early_init_devtree(%p)\n", params);
+ DBG(" -> early_init_devtree(%px)\n", params);
/* Too early to BUG_ON(), do it by hand */
if (!early_init_dt_verify(params))
@@ -703,9 +703,12 @@
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+ /* Scan tree for ultravisor feature */
+ of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -732,7 +735,7 @@
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline();
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.
@@ -749,7 +752,7 @@
memblock_allow_resize();
memblock_dump_all();
- DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
+ DBG("Phys. mem: %llx\n", (unsigned long long)memblock_phys_mem_size());
/* We may need to relocate the flat tree, do it now.
* FIXME .. and the initrd too? */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 9b38a2e..100f1b5 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Procedures for interfacing to Open Firmware.
*
@@ -6,11 +7,6 @@
*
* Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
* {engebret|bergner}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG_PROM
@@ -43,11 +39,14 @@
#include <asm/btext.h>
#include <asm/sections.h>
#include <asm/machdep.h>
-#include <asm/opal.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
+/* All of prom_init bss lives here */
+#define __prombss __section(.bss.prominit)
+
/*
* Eventually bump that one up
*/
@@ -87,7 +86,7 @@
#define OF_WORKAROUNDS 0
#else
#define OF_WORKAROUNDS of_workarounds
-int of_workarounds;
+static int of_workarounds __prombss;
#endif
#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
@@ -96,7 +95,7 @@
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
__FILE__, __LINE__); \
- __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
+ __builtin_trap(); \
} while (0)
#ifdef DEBUG_PROM
@@ -148,29 +147,34 @@
unsigned long size, unsigned long offset);
/* prom structure */
-static struct prom_t __initdata prom;
+static struct prom_t __prombss prom;
-static unsigned long prom_entry __initdata;
+static unsigned long __prombss prom_entry;
-#define PROM_SCRATCH_SIZE 256
+static char __prombss of_stdout_device[256];
+static char __prombss prom_scratch[256];
-static char __initdata of_stdout_device[256];
-static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
+static unsigned long __prombss dt_header_start;
+static unsigned long __prombss dt_struct_start, dt_struct_end;
+static unsigned long __prombss dt_string_start, dt_string_end;
-static unsigned long __initdata dt_header_start;
-static unsigned long __initdata dt_struct_start, dt_struct_end;
-static unsigned long __initdata dt_string_start, dt_string_end;
-
-static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+static unsigned long __prombss prom_initrd_start, prom_initrd_end;
#ifdef CONFIG_PPC64
-static int __initdata prom_iommu_force_on;
-static int __initdata prom_iommu_off;
-static unsigned long __initdata prom_tce_alloc_start;
-static unsigned long __initdata prom_tce_alloc_end;
+static int __prombss prom_iommu_force_on;
+static int __prombss prom_iommu_off;
+static unsigned long __prombss prom_tce_alloc_start;
+static unsigned long __prombss prom_tce_alloc_end;
#endif
-static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+#ifdef CONFIG_PPC_PSERIES
+static bool __prombss prom_radix_disable;
+static bool __prombss prom_xive_disable;
+#endif
+
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
+#endif
struct platform_support {
bool hash_mmu;
@@ -188,26 +192,25 @@
#define PLATFORM_LPAR 0x0001
#define PLATFORM_POWERMAC 0x0400
#define PLATFORM_GENERIC 0x0500
-#define PLATFORM_OPAL 0x0600
-static int __initdata of_platform;
+static int __prombss of_platform;
-static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
+static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
-static unsigned long __initdata prom_memory_limit;
+static unsigned long __prombss prom_memory_limit;
-static unsigned long __initdata alloc_top;
-static unsigned long __initdata alloc_top_high;
-static unsigned long __initdata alloc_bottom;
-static unsigned long __initdata rmo_top;
-static unsigned long __initdata ram_top;
+static unsigned long __prombss alloc_top;
+static unsigned long __prombss alloc_top_high;
+static unsigned long __prombss alloc_bottom;
+static unsigned long __prombss rmo_top;
+static unsigned long __prombss ram_top;
-static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
-static int __initdata mem_reserve_cnt;
+static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __prombss mem_reserve_cnt;
-static cell_t __initdata regbuf[1024];
+static cell_t __prombss regbuf[1024];
-static bool rtas_has_query_cpu_stopped;
+static bool __prombss rtas_has_query_cpu_stopped;
/*
@@ -221,6 +224,135 @@
#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR)
#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR)
+/* Copied from lib/string.c and lib/kstrtox.c */
+
+static int __init prom_strcmp(const char *cs, const char *ct)
+{
+ unsigned char c1, c2;
+
+ while (1) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ }
+ return 0;
+}
+
+static char __init *prom_strcpy(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return tmp;
+}
+
+static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+static size_t __init prom_strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+ if ((res = *su1 - *su2) != 0)
+ break;
+ return res;
+}
+
+static char __init *prom_strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = prom_strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = prom_strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!prom_memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = prom_strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static int __init prom_strtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+#endif
/* This is the one and *ONLY* place where we actually call open
* firmware.
@@ -498,14 +630,14 @@
}
}
-static inline int prom_getprop(phandle node, const char *pname,
- void *value, size_t valuelen)
+static inline int __init prom_getprop(phandle node, const char *pname,
+ void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static inline int prom_getproplen(phandle node, const char *pname)
+static inline int __init prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
@@ -522,8 +654,8 @@
static char *tohex(unsigned int x)
{
- static char digits[] = "0123456789abcdef";
- static char result[9];
+ static const char digits[] __initconst = "0123456789abcdef";
+ static char result[9] __prombss;
int i;
result[8] = 0;
@@ -552,7 +684,7 @@
add_string(&p, tohex((u32)(unsigned long) value));
add_string(&p, tohex(valuelen));
add_string(&p, tohex(ADDR(pname)));
- add_string(&p, tohex(strlen(pname)));
+ add_string(&p, tohex(prom_strlen(pname)));
add_string(&p, "property");
*p = 0;
return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
@@ -628,33 +760,30 @@
const char *opt;
char *p;
- int l __maybe_unused = 0;
+ int l = 0;
prom_cmd_line[0] = 0;
p = prom_cmd_line;
if ((long)prom.chosen > 0)
l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-#ifdef CONFIG_CMDLINE
- if (l <= 0 || p[0] == '\0') /* dbl check */
- strlcpy(prom_cmd_line,
- CONFIG_CMDLINE, sizeof(prom_cmd_line));
-#endif /* CONFIG_CMDLINE */
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
+ prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
prom_printf("command line: %s\n", prom_cmd_line);
#ifdef CONFIG_PPC64
- opt = strstr(prom_cmd_line, "iommu=");
+ opt = prom_strstr(prom_cmd_line, "iommu=");
if (opt) {
prom_printf("iommu opt is: %s\n", opt);
opt += 6;
while (*opt && *opt == ' ')
opt++;
- if (!strncmp(opt, "off", 3))
+ if (!prom_strncmp(opt, "off", 3))
prom_iommu_off = 1;
- else if (!strncmp(opt, "force", 5))
+ else if (!prom_strncmp(opt, "force", 5))
prom_iommu_force_on = 1;
}
#endif
- opt = strstr(prom_cmd_line, "mem=");
+ opt = prom_strstr(prom_cmd_line, "mem=");
if (opt) {
opt += 4;
prom_memory_limit = prom_memparse(opt, (const char **)&opt);
@@ -664,13 +793,15 @@
#endif
}
- opt = strstr(prom_cmd_line, "disable_radix");
+#ifdef CONFIG_PPC_PSERIES
+ prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+ opt = prom_strstr(prom_cmd_line, "disable_radix");
if (opt) {
opt += 13;
if (*opt && *opt == '=') {
bool val;
- if (kstrtobool(++opt, &val))
+ if (prom_strtobool(++opt, &val))
prom_radix_disable = false;
else
prom_radix_disable = val;
@@ -679,9 +810,27 @@
}
if (prom_radix_disable)
prom_debug("Radix disabled from cmdline\n");
+
+ opt = prom_strstr(prom_cmd_line, "xive=off");
+ if (opt) {
+ prom_xive_disable = true;
+ prom_debug("XIVE disabled from cmdline\n");
+ }
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+ opt = prom_strstr(prom_cmd_line, "svm=");
+ if (opt) {
+ bool val;
+
+ opt += sizeof("svm=") - 1;
+ if (!prom_strtobool(opt, &val))
+ prom_svm_enable = val;
+ }
+#endif /* CONFIG_PPC_SVM */
}
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* The architecture vector has an array of PVR mask/value pairs,
* followed by # option vectors - 1, followed by the option vectors.
@@ -782,7 +931,7 @@
struct option_vector6 vec6;
} __packed;
-struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
+static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.pvrs = {
{
.mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
@@ -920,9 +1069,11 @@
},
};
+static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
+
/* Old method - ELF header with PT_NOTE sections only works on BE */
#ifdef __BIG_ENDIAN__
-static struct fake_elf {
+static const struct fake_elf {
Elf32_Ehdr elfhdr;
Elf32_Phdr phdr[2];
struct chrpnote {
@@ -955,7 +1106,7 @@
u32 ignore_me;
} rpadesc;
} rpanote;
-} fake_elf = {
+} fake_elf __initconst = {
.elfhdr = {
.e_ident = { 0x7f, 'E', 'L', 'F',
ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
@@ -1020,7 +1171,7 @@
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu"))
+ if (prom_strcmp(type, "cpu"))
continue;
/*
* There is an entry for each smt thread, each entry being
@@ -1084,10 +1235,17 @@
switch (val) {
case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
prom_debug("XIVE - either mode supported\n");
- support->xive = true;
+ support->xive = !prom_xive_disable;
break;
case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
prom_debug("XIVE - exploitation mode supported\n");
+ if (prom_xive_disable) {
+ /*
+ * If we __have__ to do XIVE, we're better off ignoring
+ * the command line rather than not booting.
+ */
+ prom_printf("WARNING: Ignoring cmdline option xive=off\n");
+ }
support->xive = true;
break;
case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
@@ -1129,14 +1287,27 @@
};
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
+
+ /*
+ * First copy the architecture vec template
+ *
+ * use memcpy() instead of *vec = *vec_template so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
+ sizeof(ibm_architecture_vec));
+
if (prop_len > 1) {
int i;
- u8 vec[prop_len];
+ u8 vec[8];
prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
prop_len);
+ if (prop_len > sizeof(vec))
+ prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
+ prop_len);
prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
&vec, sizeof(vec));
- for (i = 0; i < prop_len; i += 2) {
+ for (i = 0; i < sizeof(vec); i += 2) {
prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
, vec[i]
, vec[i + 1]);
@@ -1225,7 +1396,7 @@
}
#endif /* __BIG_ENDIAN__ */
}
-#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+#endif /* CONFIG_PPC_PSERIES */
/*
* Memory allocation strategy... our layout is normally:
@@ -1421,9 +1592,6 @@
static void __init prom_init_mem(void)
{
phandle node;
-#ifdef DEBUG_PROM
- char *path;
-#endif
char type[64];
unsigned int plen;
cell_t *p, *endp;
@@ -1445,9 +1613,6 @@
prom_debug("root_size_cells: %x\n", rsc);
prom_debug("scanning memory:\n");
-#ifdef DEBUG_PROM
- path = prom_scratch;
-#endif
for (node = 0; prom_next_node(&node); ) {
type[0] = 0;
@@ -1460,7 +1625,7 @@
*/
prom_getprop(node, "name", type, sizeof(type));
}
- if (strcmp(type, "memory"))
+ if (prom_strcmp(type, "memory"))
continue;
plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf));
@@ -1472,9 +1637,10 @@
endp = p + (plen / sizeof(cell_t));
#ifdef DEBUG_PROM
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
- prom_debug(" node %s :\n", path);
+ memset(prom_scratch, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, prom_scratch,
+ sizeof(prom_scratch) - 1);
+ prom_debug(" node %s :\n", prom_scratch);
#endif /* DEBUG_PROM */
while ((endp - p) >= (rac + rsc)) {
@@ -1562,87 +1728,42 @@
}
}
-#ifdef CONFIG_PPC_POWERNV
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-static u64 __initdata prom_opal_base;
-static u64 __initdata prom_opal_entry;
-#endif
-
-/*
- * Allocate room for and instantiate OPAL
- */
-static void __init prom_instantiate_opal(void)
+#ifdef CONFIG_PPC_SVM
+static int prom_rtas_hcall(uint64_t args)
{
- phandle opal_node;
- ihandle opal_inst;
- u64 base, entry;
- u64 size = 0, align = 0x10000;
- __be64 val64;
- u32 rets[2];
+ register uint64_t arg1 asm("r3") = H_RTAS;
+ register uint64_t arg2 asm("r4") = args;
- prom_debug("prom_instantiate_opal: start...\n");
-
- opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
- prom_debug("opal_node: %x\n", opal_node);
- if (!PHANDLE_VALID(opal_node))
- return;
-
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
- size = be64_to_cpu(val64);
- if (size == 0)
- return;
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
- align = be64_to_cpu(val64);
-
- base = alloc_down(size, align, 0);
- if (base == 0) {
- prom_printf("OPAL allocation failed !\n");
- return;
- }
-
- opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
- if (!IHANDLE_VALID(opal_inst)) {
- prom_printf("opening opal package failed (%x)\n", opal_inst);
- return;
- }
-
- prom_printf("instantiating opal at 0x%llx...", base);
-
- if (call_prom_ret("call-method", 4, 3, rets,
- ADDR("load-opal-runtime"),
- opal_inst,
- base >> 32, base & 0xffffffff) != 0
- || (rets[0] == 0 && rets[1] == 0)) {
- prom_printf(" failed\n");
- return;
- }
- entry = (((u64)rets[0]) << 32) | rets[1];
-
- prom_printf(" done\n");
-
- reserve_mem(base, size);
-
- prom_debug("opal base = 0x%llx\n", base);
- prom_debug("opal align = 0x%llx\n", align);
- prom_debug("opal entry = 0x%llx\n", entry);
- prom_debug("opal size = 0x%llx\n", size);
-
- prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
- &base, sizeof(base));
- prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
- &entry, sizeof(entry));
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- prom_opal_base = base;
- prom_opal_entry = entry;
-#endif
- prom_debug("prom_instantiate_opal: end...\n");
+ asm volatile("sc 1\n" : "=r" (arg1) :
+ "r" (arg1),
+ "r" (arg2) :);
+ return arg1;
}
-#endif /* CONFIG_PPC_POWERNV */
+static struct rtas_args __prombss os_term_args;
+
+static void __init prom_rtas_os_term(char *str)
+{
+ phandle rtas_node;
+ __be32 val;
+ u32 token;
+
+ prom_debug("%s: start...\n", __func__);
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
+ return;
+
+ val = 0;
+ prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+ token = be32_to_cpu(val);
+ prom_debug("ibm,os-term: %x\n", token);
+ if (token == 0)
+ prom_panic("Could not get token for ibm,os-term\n");
+ os_term_args.token = cpu_to_be32(token);
+ prom_rtas_hcall((uint64_t)&os_term_args);
+}
+#endif /* CONFIG_PPC_SVM */
/*
* Allocate room for and instantiate RTAS
@@ -1823,19 +1944,19 @@
prom_getprop(node, "device_type", type, sizeof(type));
prom_getprop(node, "model", model, sizeof(model));
- if ((type[0] == 0) || (strstr(type, "pci") == NULL))
+ if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL))
continue;
/* Keep the old logic intact to avoid regression. */
if (compatible[0] != 0) {
- if ((strstr(compatible, "python") == NULL) &&
- (strstr(compatible, "Speedwagon") == NULL) &&
- (strstr(compatible, "Winnipeg") == NULL))
+ if ((prom_strstr(compatible, "python") == NULL) &&
+ (prom_strstr(compatible, "Speedwagon") == NULL) &&
+ (prom_strstr(compatible, "Winnipeg") == NULL))
continue;
} else if (model[0] != 0) {
- if ((strstr(model, "ython") == NULL) &&
- (strstr(model, "peedwagon") == NULL) &&
- (strstr(model, "innipeg") == NULL))
+ if ((prom_strstr(model, "ython") == NULL) &&
+ (prom_strstr(model, "peedwagon") == NULL) &&
+ (prom_strstr(model, "innipeg") == NULL))
continue;
}
@@ -1863,10 +1984,10 @@
local_alloc_bottom = base;
/* It seems OF doesn't null-terminate the path :-( */
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/* Call OF to setup the TCE hardware */
if (call_prom("package-to-path", 3, 1, node,
- path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
+ path, sizeof(prom_scratch) - 1) == PROM_ERROR) {
prom_printf("package-to-path failed\n");
}
@@ -1984,12 +2105,12 @@
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu") != 0)
+ if (prom_strcmp(type, "cpu") != 0)
continue;
/* Skip non-configured cpus. */
if (prom_getprop(node, "status", type, sizeof(type)) > 0)
- if (strcmp(type, "okay") != 0)
+ if (prom_strcmp(type, "okay") != 0)
continue;
reg = cpu_to_be32(-1); /* make sparse happy */
@@ -2065,9 +2186,9 @@
return;
version[sizeof(version) - 1] = 0;
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0)
of_workarounds = OF_WA_CLAIM;
- else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) {
of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
} else
@@ -2100,7 +2221,7 @@
call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
prom_printf("OF stdout device is: %s\n", of_stdout_device);
prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
- path, strlen(path) + 1);
+ path, prom_strlen(path) + 1);
/* instance-to-package fails on PA-Semi */
stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
@@ -2110,7 +2231,7 @@
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(stdout_node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") == 0)
+ if (prom_strcmp(type, "display") == 0)
prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
}
}
@@ -2131,29 +2252,25 @@
compat[len] = 0;
while (i < len) {
char *p = &compat[i];
- int sl = strlen(p);
+ int sl = prom_strlen(p);
if (sl == 0)
break;
- if (strstr(p, "Power Macintosh") ||
- strstr(p, "MacRISC"))
+ if (prom_strstr(p, "Power Macintosh") ||
+ prom_strstr(p, "MacRISC"))
return PLATFORM_POWERMAC;
#ifdef CONFIG_PPC64
/* We must make sure we don't detect the IBM Cell
* blades as pSeries due to some firmware issues,
* so we do it here.
*/
- if (strstr(p, "IBM,CBEA") ||
- strstr(p, "IBM,CPBW-1.0"))
+ if (prom_strstr(p, "IBM,CBEA") ||
+ prom_strstr(p, "IBM,CPBW-1.0"))
return PLATFORM_GENERIC;
#endif /* CONFIG_PPC64 */
i += sl + 1;
}
}
#ifdef CONFIG_PPC64
- /* Try to detect OPAL */
- if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
- return PLATFORM_OPAL;
-
/* Try to figure out if it's an IBM pSeries or any other
* PAPR compliant platform. We assume it is if :
* - /device_type is "chrp" (please, do NOT use that for future
@@ -2164,7 +2281,7 @@
compat, sizeof(compat)-1);
if (len <= 0)
return PLATFORM_GENERIC;
- if (strcmp(compat, "chrp"))
+ if (prom_strcmp(compat, "chrp"))
return PLATFORM_GENERIC;
/* Default to pSeries. We need to know if we are running LPAR */
@@ -2202,7 +2319,7 @@
ihandle ih;
int i;
- static unsigned char default_colors[] = {
+ static const unsigned char default_colors[] __initconst = {
0x00, 0x00, 0x00,
0x00, 0x00, 0xaa,
0x00, 0xaa, 0x00,
@@ -2226,19 +2343,19 @@
for (node = 0; prom_next_node(&node); ) {
memset(type, 0, sizeof(type));
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") != 0)
+ if (prom_strcmp(type, "display") != 0)
continue;
/* It seems OF doesn't null-terminate the path :-( */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/*
* leave some room at the end of the path for appending extra
* arguments
*/
if (call_prom("package-to-path", 3, 1, node, path,
- PROM_SCRATCH_SIZE-10) == PROM_ERROR)
+ sizeof(prom_scratch) - 10) == PROM_ERROR)
continue;
prom_printf("found display : %s, opening... ", path);
@@ -2281,6 +2398,7 @@
prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
width, height, pitch, addr);
btext_setup_display(width, height, 8, pitch, addr);
+ btext_prepare_BAT();
}
#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
}
@@ -2330,9 +2448,9 @@
s = os = (char *)dt_string_start;
s += 4;
while (s < (char *)dt_string_end) {
- if (strcmp(s, str) == 0)
+ if (prom_strcmp(s, str) == 0)
return s - os;
- s += strlen(s) + 1;
+ s += prom_strlen(s) + 1;
}
return 0;
}
@@ -2365,7 +2483,7 @@
}
/* skip "name" */
- if (strcmp(namep, "name") == 0) {
+ if (prom_strcmp(namep, "name") == 0) {
*mem_start = (unsigned long)namep;
prev_name = "name";
continue;
@@ -2377,7 +2495,7 @@
namep = sstart + soff;
} else {
/* Trim off some if we can */
- *mem_start = (unsigned long)namep + strlen(namep) + 1;
+ *mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
dt_string_end = *mem_start;
}
prev_name = namep;
@@ -2398,7 +2516,7 @@
char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
unsigned long soff;
unsigned char *valp;
- static char pname[MAX_PROPERTY_NAME];
+ static char pname[MAX_PROPERTY_NAME] __prombss;
int l, room, has_phandle = 0;
dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
@@ -2434,8 +2552,8 @@
/* get it again for debugging */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ memset(path, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
/* get and store all properties */
prev_name = "";
@@ -2446,7 +2564,7 @@
break;
/* skip "name" */
- if (strcmp(pname, "name") == 0) {
+ if (prom_strcmp(pname, "name") == 0) {
prev_name = "name";
continue;
}
@@ -2477,18 +2595,15 @@
call_prom("getprop", 4, 1, node, pname, valp, l);
*mem_start = _ALIGN(*mem_start, 4);
- if (!strcmp(pname, "phandle"))
+ if (!prom_strcmp(pname, "phandle"))
has_phandle = 1;
}
- /* Add a "linux,phandle" property if no "phandle" property already
- * existed (can happen with OPAL)
- */
+ /* Add a "phandle" property if none already exist */
if (!has_phandle) {
- soff = dt_find_string("linux,phandle");
+ soff = dt_find_string("phandle");
if (soff == 0)
- prom_printf("WARNING: Can't find string index for"
- " <linux-phandle> node %s\n", path);
+ prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
else {
dt_push_token(OF_DT_PROP, mem_start, mem_end);
dt_push_token(4, mem_start, mem_end);
@@ -2548,10 +2663,10 @@
dt_string_start = mem_start;
mem_start += 4; /* hole */
- /* Add "linux,phandle" in there, we'll need it */
+ /* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- strcpy(namep, "linux,phandle");
- mem_start = (unsigned long)namep + strlen(namep) + 1;
+ prom_strcpy(namep, "phandle");
+ mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
/* Build string array */
prom_printf("Building dt strings...\n");
@@ -2873,7 +2988,7 @@
rv = prom_getprop(node, "model", prop, sizeof(prop));
if (rv == PROM_ERROR)
return;
- if (strcmp(prop, "EFIKA5K2"))
+ if (prom_strcmp(prop, "EFIKA5K2"))
return;
prom_printf("Applying EFIKA device tree fixups\n");
@@ -2881,13 +2996,13 @@
/* Claiming to be 'chrp' is death */
node = call_prom("finddevice", 1, 1, ADDR("/"));
rv = prom_getprop(node, "device_type", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0))
+ if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0))
prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
/* CODEGEN,description is exposed in /proc/cpuinfo so
fix that too */
rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strstr(prop, "CHRP")))
+ if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP")))
prom_setprop(node, "/", "CODEGEN,description",
"Efika 5200B PowerPC System",
sizeof("Efika 5200B PowerPC System"));
@@ -3106,6 +3221,59 @@
#endif
#endif
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+{
+ register unsigned long r3 asm("r3") = UV_ESM;
+ register unsigned long r4 asm("r4") = kbase;
+ register unsigned long r5 asm("r5") = fdt;
+
+ asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
+
+ return r3;
+}
+
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+ int ret;
+
+ if (!prom_svm_enable)
+ return;
+
+ /* Switch to secure mode. */
+ prom_printf("Switching to secure mode.\n");
+
+ /*
+ * The ultravisor will do an integrity check of the kernel image but we
+ * relocated it so the check will fail. Restore the original image by
+ * relocating it back to the kernel virtual base address.
+ */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(KERNELBASE);
+
+ ret = enter_secure_mode(kbase, fdt);
+
+ /* Relocate the kernel again. */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(kbase);
+
+ if (ret != U_SUCCESS) {
+ prom_printf("Returned %d from switching to secure mode.\n", ret);
+ prom_rtas_os_term("Switch to secure mode failed.\n");
+ }
+}
+#else
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* We enter here early on, when the Open Firmware prom is still
* handling exceptions and the MMU hash table for us.
@@ -3172,7 +3340,7 @@
*/
early_cmdline_parse();
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
@@ -3216,15 +3384,9 @@
* On non-powermacs, try to instantiate RTAS. PowerMacs don't
* have a usable RTAS implementation.
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_instantiate_rtas();
-#ifdef CONFIG_PPC_POWERNV
- if (of_platform == PLATFORM_OPAL)
- prom_instantiate_opal();
-#endif /* CONFIG_PPC_POWERNV */
-
#ifdef CONFIG_PPC64
/* instantiate sml */
prom_instantiate_sml();
@@ -3237,8 +3399,7 @@
*
* (This must be done after instanciating RTAS)
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_hold_cpus();
/*
@@ -3282,11 +3443,9 @@
/*
* in case stdin is USB and still active on IBM machines...
* Unfortunately quiesce crashes on some powermacs if we have
- * closed stdin already (in particular the powerbook 101). It
- * appears that the OPAL version of OFW doesn't like it either.
+ * closed stdin already (in particular the powerbook 101).
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_close_stdin();
/*
@@ -3304,10 +3463,8 @@
hdr = dt_header_start;
/* Don't print anything after quiesce under OPAL, it crashes OFW */
- if (of_platform != PLATFORM_OPAL) {
- prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
- prom_debug("->dt_header_start=0x%lx\n", hdr);
- }
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
+ prom_debug("->dt_header_start=0x%lx\n", hdr);
#ifdef CONFIG_PPC32
reloc_got2(-offset);
@@ -3315,13 +3472,10 @@
unreloc_toc();
#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
- __start(hdr, kbase, 0, 0, 0,
- prom_opal_base, prom_opal_entry);
-#else
+ /* Move to secure memory if we're supposed to be secure guests. */
+ setup_secure_guest(kbase, hdr);
+
__start(hdr, kbase, 0, 0, 0, 0, 0);
-#endif
return 0;
}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index acb6b92..b183ab9 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -1,11 +1,8 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
#
# Copyright © 2008 IBM Corporation
#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
# This script checks prom_init.o to see what external symbols it
# is using, if it finds symbols not in the whitelist it returns
@@ -16,18 +13,39 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
+grep "^CONFIG_KASAN=y$" .config >/dev/null
+if [ $? -eq 0 ]
+then
+ MEM_FUNCS="__memcpy __memset"
+else
+ MEM_FUNCS="memcpy memset"
+fi
+
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
-_end enter_prom memcpy memset reloc_offset __secondary_hold
+_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
+logo_linux_clut224 btext_prepare_BAT
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
+relocate"
NM="$1"
OBJ="$2"
ERROR=0
+check_section()
+{
+ file=$1
+ section=$2
+ size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
+ size=${size:-0}
+ if [ $size -ne 0 ]; then
+ ERROR=1
+ echo "Error: Section $section not empty in prom_init.c" >&2
+ fi
+}
+
for UNDEF in $($NM -u $OBJ | awk '{print $2}')
do
# On 64-bit nm gives us the function descriptors, which have
@@ -66,4 +84,8 @@
fi
done
+check_section $OBJ .data
+check_section $OBJ .bss
+check_section $OBJ .init.data
+
exit $ERROR
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 9667666..8c92feb 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -33,6 +33,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
@@ -42,6 +43,7 @@
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
#include <asm/debug.h>
+#include <asm/hw_breakpoint.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -274,6 +276,8 @@
*/
int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
{
+ unsigned int regs_max;
+
if ((task->thread.regs == NULL) || !data)
return -EIO;
@@ -297,7 +301,9 @@
}
#endif
- if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
}
@@ -321,6 +327,7 @@
return set_user_dscr(task, data);
if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
((unsigned long *)task->thread.regs)[regno] = data;
return 0;
}
@@ -360,10 +367,10 @@
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.regs->orig_gpr3,
offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
+ sizeof(struct user_pt_regs));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
+ sizeof(struct user_pt_regs), -1);
return ret;
}
@@ -561,6 +568,7 @@
/*
* Copy out only the low-order word of vrsave.
*/
+ int start, end;
union {
elf_vrreg_t reg;
u32 word;
@@ -569,8 +577,10 @@
vrsave.word = target->thread.vrsave;
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
+ start, end);
}
return ret;
@@ -608,6 +618,7 @@
/*
* We use only the first word of vrsave.
*/
+ int start, end;
union {
elf_vrreg_t reg;
u32 word;
@@ -616,8 +627,10 @@
vrsave.word = target->thread.vrsave;
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
+ start, end);
if (!ret)
target->thread.vrsave = vrsave.word;
}
@@ -853,10 +866,10 @@
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.ckpt_regs.orig_gpr3,
offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
+ sizeof(struct user_pt_regs));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
+ sizeof(struct user_pt_regs), -1);
return ret;
}
@@ -1609,7 +1622,7 @@
void *kbuf, void __user *ubuf)
{
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
+ &target->thread.regs->ppr, 0, sizeof(u64));
}
static int ppr_set(struct task_struct *target,
@@ -1618,7 +1631,7 @@
const void *kbuf, const void __user *ubuf)
{
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
+ &target->thread.regs->ppr, 0, sizeof(u64));
}
static int dscr_get(struct task_struct *target,
@@ -3075,7 +3088,7 @@
dbginfo.sizeof_condition = 0;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
#else
dbginfo.features = 0;
@@ -3130,7 +3143,7 @@
case PTRACE_GETREGS: /* Get all pt_regs from the child. */
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_GPR,
- 0, sizeof(struct pt_regs),
+ 0, sizeof(struct user_pt_regs),
datavp);
#ifdef CONFIG_PPC64
@@ -3139,7 +3152,7 @@
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_GPR,
- 0, sizeof(struct pt_regs),
+ 0, sizeof(struct user_pt_regs),
datavp);
case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
@@ -3262,17 +3275,40 @@
*/
long do_syscall_trace_enter(struct pt_regs *regs)
{
+ u32 flags;
+
user_exit();
- /*
- * The tracer may decide to abort the syscall, if so tracehook
- * will return !0. Note that the tracer may also just change
- * regs->gpr[0] to an invalid syscall number, that is handled
- * below on the exit path.
- */
- if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- tracehook_report_syscall_entry(regs))
- goto skip;
+ flags = READ_ONCE(current_thread_info()->flags) &
+ (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+
+ if (flags) {
+ int rc = tracehook_report_syscall_entry(regs);
+
+ if (unlikely(flags & _TIF_SYSCALL_EMU)) {
+ /*
+ * A nonzero return code from
+ * tracehook_report_syscall_entry() tells us to prevent
+ * the syscall execution, but we are not going to
+ * execute it anyway.
+ *
+ * Returning -1 will skip the syscall execution. We want
+ * to avoid clobbering any registers, so we don't goto
+ * the skip label below.
+ */
+ return -1;
+ }
+
+ if (rc) {
+ /*
+ * The tracer decided to abort the syscall. Note that
+ * the tracer may also just change regs->gpr[0] to an
+ * invalid syscall number, that is handled below on the
+ * exit path.
+ */
+ goto skip;
+ }
+ }
/* Run seccomp after ptrace; allow it to set gpr[3]. */
if (do_seccomp(regs))
@@ -3324,3 +3360,42 @@
user_enter();
}
+
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+}
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index f366fed..10e96f3 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Code to process dynamic relocations for PPC32.
*
@@ -5,11 +6,6 @@
* Author: Suzuki Poulose <suzuki@in.ibm.com>
*
* - Based on ppc64 code - reloc_64.S
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index e8cfc69..02d4719 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Code to process dynamic relocations in the kernel.
*
* Copyright 2008 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8afd146..c5fa251 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
*
* Procedures for interfacing to the RTAS on CHRP machines.
*
* Peter Bergner, IBM March 2001.
* Copyright (C) 2001 IBM.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
@@ -20,6 +16,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -875,15 +872,17 @@
return 0;
for_each_cpu(cpu, cpus) {
+ struct device *dev = get_cpu_device(cpu);
+
switch (state) {
case DOWN:
- cpuret = cpu_down(cpu);
+ cpuret = device_offline(dev);
break;
case UP:
- cpuret = cpu_up(cpu);
+ cpuret = device_online(dev);
break;
}
- if (cpuret) {
+ if (cpuret < 0) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
@@ -900,6 +899,7 @@
cpumask_clear_cpu(cpu, cpus);
}
}
+ cond_resched();
}
return ret;
@@ -926,13 +926,11 @@
return ret;
}
-EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
-EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(u64 handle)
{
@@ -972,6 +970,8 @@
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
+ lock_device_hotplug();
+
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
@@ -981,20 +981,27 @@
goto out;
}
- stop_topology_update();
+ cpu_hotplug_disable();
+
+ /* Check if we raced with a CPU-Offline Operation */
+ if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) {
+ pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__);
+ atomic_set(&data.error, -EAGAIN);
+ goto out_hotplug_enable;
+ }
/* Call function on all CPUs. One of us will make the
* rtas call
*/
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- atomic_set(&data.error, -EINVAL);
+ on_each_cpu(rtas_percpu_suspend_me, &data, 0);
wait_for_completion(&done);
if (atomic_read(&data.error) != 0)
printk(KERN_ERR "Error doing global join\n");
- start_topology_update();
+out_hotplug_enable:
+ cpu_hotplug_enable();
/* Take down CPUs not online prior to suspend */
cpuret = rtas_offline_cpus_mask(offline_mask);
@@ -1003,6 +1010,7 @@
__func__);
out:
+ unlock_device_hotplug();
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
@@ -1178,7 +1186,11 @@
ibm_suspend_me_token = rtas_token("ibm,suspend-me");
}
#endif
- rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
+ rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+ 0, rtas_region);
+ if (!rtas_rmo_buf)
+ panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+ PAGE_SIZE, &rtas_region);
#ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 10fabae..84f7947 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* c 2001 PPC 64 Team, IBM Corp
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* /proc/powerpc/rtas/firmware_flash interface
*
* This file implements a firmware_flash interface to pump a firmware
@@ -523,7 +519,7 @@
args_buf->status = VALIDATE_INCOMPLETE;
}
- if (!access_ok(VERIFY_READ, buf, count)) {
+ if (!access_ok(buf, count)) {
rc = -EFAULT;
goto done;
}
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index c2b148b..ae5e43e 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Dave Engebretsen, IBM Corporation
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
@@ -5,20 +6,6 @@
* RTAS specific routines for PCI.
*
* Based on code from pci.c, chrp_pci.c and pSeries_pci.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c3..8d02e04 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Communication to userspace based on kernel/printk.c
*/
@@ -91,6 +87,8 @@
return "Dump Notification Event";
case RTAS_TYPE_PRRN:
return "Platform Resource Reassignment Event";
+ case RTAS_TYPE_HOTPLUG:
+ return "Hotplug Event";
}
return rtas_type[0];
@@ -150,8 +148,10 @@
} else {
struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
- printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
- error_log_cnt, rtas_event_type(rtas_error_type(errlog)),
+ printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
+ error_log_cnt,
+ rtas_event_type(rtas_error_type(errlog)),
+ rtas_error_type(errlog),
rtas_error_severity(errlog));
}
}
@@ -274,27 +274,16 @@
}
#ifdef CONFIG_PPC_PSERIES
-static s32 prrn_update_scope;
-
-static void prrn_work_fn(struct work_struct *work)
+static void handle_prrn_event(s32 scope)
{
/*
* For PRRN, we must pass the negative of the scope value in
* the RTAS event.
*/
- pseries_devicetree_update(-prrn_update_scope);
+ pseries_devicetree_update(-scope);
numa_update_cpu_topology(false);
}
-static DECLARE_WORK(prrn_work, prrn_work_fn);
-
-static void prrn_schedule_update(u32 scope)
-{
- flush_work(&prrn_work);
- prrn_update_scope = scope;
- schedule_work(&prrn_work);
-}
-
static void handle_rtas_event(const struct rtas_error_log *log)
{
if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
@@ -303,7 +292,7 @@
/* For PRRN Events the extended log length is used to denote
* the scope for calling rtas update-nodes.
*/
- prrn_schedule_update(rtas_error_extended_log_length(log));
+ handle_prrn_event(rtas_error_extended_log_length(log));
}
#else
@@ -342,7 +331,7 @@
count = rtas_error_log_buffer_max;
- if (!access_ok(VERIFY_WRITE, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
tmp = kmalloc(count, GFP_KERNEL);
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index f6f469f..bd91dce 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -4,6 +4,7 @@
//
// Copyright 2018, Michael Ellerman, IBM Corporation.
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/seq_buf.h>
@@ -22,10 +23,15 @@
COUNT_CACHE_FLUSH_SW = 0x2,
COUNT_CACHE_FLUSH_HW = 0x4,
};
-static enum count_cache_flush_type count_cache_flush_type;
+static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+static bool link_stack_flush_enabled;
bool barrier_nospec_enabled;
static bool no_nospec;
+static bool btb_flush_enabled;
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
+static bool no_spectrev2;
+#endif
static void enable_barrier_nospec(bool enable)
{
@@ -52,7 +58,7 @@
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
- if (!no_nospec)
+ if (!no_nospec && !cpu_mitigations_off())
enable_barrier_nospec(enable);
}
@@ -99,8 +105,36 @@
return 0;
}
device_initcall(barrier_nospec_debugfs_init);
+
+static __init int security_feature_debugfs_init(void)
+{
+ debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
+ (u64 *)&powerpc_security_features);
+ return 0;
+}
+device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
+static int __init handle_nospectre_v2(char *p)
+{
+ no_spectrev2 = true;
+
+ return 0;
+}
+early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+void setup_spectre_v2(void)
+{
+ if (no_spectrev2 || cpu_mitigations_off())
+ do_btb_flush_fixups();
+ else
+ btb_flush_enabled = true;
+}
+#endif /* CONFIG_PPC_FSL_BOOK3E */
+
#ifdef CONFIG_PPC_BOOK3S_64
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -168,31 +202,35 @@
bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
- if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
- bool comma = false;
+ if (bcs || ccd) {
seq_buf_printf(&s, "Mitigation: ");
- if (bcs) {
+ if (bcs)
seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
- comma = true;
- }
- if (ccd) {
- if (comma)
- seq_buf_printf(&s, ", ");
- seq_buf_printf(&s, "Indirect branch cache disabled");
- comma = true;
- }
-
- if (comma)
+ if (bcs && ccd)
seq_buf_printf(&s, ", ");
- seq_buf_printf(&s, "Software count cache flush");
+ if (ccd)
+ seq_buf_printf(&s, "Indirect branch cache disabled");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
+ } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
+ seq_buf_printf(&s, "Mitigation: Software count cache flush");
if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
- seq_buf_printf(&s, "(hardware accelerated)");
- } else
+ seq_buf_printf(&s, " (hardware accelerated)");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
+ } else if (btb_flush_enabled) {
+ seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
+ } else {
seq_buf_printf(&s, "Vulnerable");
+ }
seq_buf_printf(&s, "\n");
@@ -282,7 +320,7 @@
stf_enabled_flush_types = type;
- if (!no_stf_barrier)
+ if (!no_stf_barrier && !cpu_mitigations_off())
stf_barrier_enable(enable);
}
@@ -348,18 +386,49 @@
device_initcall(stf_barrier_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+static void no_count_cache_flush(void)
+{
+ count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+ pr_info("count-cache-flush: software flush disabled.\n");
+}
+
static void toggle_count_cache_flush(bool enable)
{
- if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
+ !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
+ enable = false;
+
+ if (!enable) {
patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
- count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
- pr_info("count-cache-flush: software flush disabled.\n");
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP);
+#endif
+ pr_info("link-stack-flush: software flush disabled.\n");
+ link_stack_flush_enabled = false;
+ no_count_cache_flush();
return;
}
+ // This enables the branch from _switch to flush_count_cache
patch_branch_site(&patch__call_flush_count_cache,
(u64)&flush_count_cache, BRANCH_SET_LINK);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ // This enables the branch from guest_exit_cont to kvm_flush_link_stack
+ patch_branch_site(&patch__call_kvm_flush_link_stack,
+ (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+#endif
+
+ pr_info("link-stack-flush: software flush enabled.\n");
+ link_stack_flush_enabled = true;
+
+ // If we just need to flush the link stack, patch an early return
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR);
+ no_count_cache_flush();
+ return;
+ }
+
if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
pr_info("count-cache-flush: full software flush sequence enabled.\n");
@@ -373,7 +442,26 @@
void setup_count_cache_flush(void)
{
- toggle_count_cache_flush(true);
+ bool enable = true;
+
+ if (no_spectrev2 || cpu_mitigations_off()) {
+ if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+ security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+ pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
+
+ enable = false;
+ }
+
+ /*
+ * There's no firmware feature flag/hypervisor bit to tell us we need to
+ * flush the link stack on context switch. So we set it here if we see
+ * either of the Spectre v2 mitigations that aim to protect userspace.
+ */
+ if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+ security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+ toggle_count_cache_flush(enable);
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 93fa0c9..25aaa39 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common boot and setup code for both 32-bit and 64-bit.
* Extracted from arch/powerpc/kernel/setup_64.c.
*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -67,6 +63,7 @@
#include <asm/livepatch.h>
#include <asm/mmu_context.h>
#include <asm/cpu_has_feature.h>
+#include <asm/kasan.h>
#include "setup.h"
@@ -133,13 +130,11 @@
/* also used by kexec */
void machine_shutdown(void)
{
-#ifdef CONFIG_FA_DUMP
/*
* if fadump is active, cleanup the fadump registration before we
* shutdown.
*/
fadump_cleanup();
-#endif
if (ppc_md.machine_shutdown)
ppc_md.machine_shutdown();
@@ -200,14 +195,15 @@
{
struct device_node *root;
const char *model = NULL;
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
unsigned long bogosum = 0;
int i;
- for_each_online_cpu(i)
- bogosum += loops_per_jiffy;
- seq_printf(m, "total bogomips\t: %lu.%02lu\n",
- bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
-#endif /* CONFIG_SMP && CONFIG_PPC32 */
+
+ if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
+ for_each_online_cpu(i)
+ bogosum += loops_per_jiffy;
+ seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+ bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
+ }
seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
if (ppc_md.name)
seq_printf(m, "platform\t: %s\n", ppc_md.name);
@@ -221,11 +217,10 @@
if (ppc_md.show_cpuinfo != NULL)
ppc_md.show_cpuinfo(m);
-#ifdef CONFIG_PPC32
/* Display the amount of memory */
- seq_printf(m, "Memory\t\t: %d MB\n",
- (unsigned int)(total_memory / (1024 * 1024)));
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "Memory\t\t: %d MB\n",
+ (unsigned int)(total_memory / (1024 * 1024)));
}
static int show_cpuinfo(struct seq_file *m, void *v)
@@ -252,26 +247,24 @@
else
seq_printf(m, "unknown (%08x)", pvr);
-#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
seq_printf(m, ", altivec supported");
-#endif /* CONFIG_ALTIVEC */
seq_printf(m, "\n");
#ifdef CONFIG_TAU
- if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
-#ifdef CONFIG_TAU_AVERAGE
- /* more straightforward, but potentially misleading */
- seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
- cpu_temp(cpu_id));
-#else
- /* show the actual temp sensor range */
- u32 temp;
- temp = cpu_temp_both(cpu_id);
- seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
- temp & 0xff, temp >> 16);
-#endif
+ if (cpu_has_feature(CPU_FTR_TAU)) {
+ if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
+ /* more straightforward, but potentially misleading */
+ seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
+ cpu_temp(cpu_id));
+ } else {
+ /* show the actual temp sensor range */
+ u32 temp;
+ temp = cpu_temp_both(cpu_id);
+ seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+ temp & 0xff, temp >> 16);
+ }
}
#endif /* CONFIG_TAU */
@@ -335,11 +328,10 @@
seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
maj, min, PVR_VER(pvr), PVR_REV(pvr));
-#ifdef CONFIG_PPC32
- seq_printf(m, "bogomips\t: %lu.%02lu\n",
- loops_per_jiffy / (500000/HZ),
- (loops_per_jiffy / (5000/HZ)) % 100);
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
+ (loops_per_jiffy / (5000 / HZ)) % 100);
+
seq_printf(m, "\n");
/* If this is the last cpu, print the summary */
@@ -401,8 +393,8 @@
#ifdef CONFIG_SMP
-int threads_per_core, threads_per_subcore, threads_shift;
-cpumask_t threads_core_mask;
+int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
+cpumask_t threads_core_mask __read_mostly;
EXPORT_SYMBOL_GPL(threads_per_core);
EXPORT_SYMBOL_GPL(threads_per_subcore);
EXPORT_SYMBOL_GPL(threads_shift);
@@ -459,9 +451,11 @@
DBG("smp_setup_cpu_maps()\n");
- cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
- __alignof__(u32)));
- memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32));
+ if (!cpu_to_phys_id)
+ panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
+ __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
@@ -635,7 +629,7 @@
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
- DBG("No suitable machine found !\n");
+ pr_err("No suitable machine description found !\n");
for (;;);
}
@@ -688,7 +682,7 @@
return ret;
parent = of_get_parent(np);
if (parent) {
- if (strcmp(parent->type, "isa") == 0)
+ if (of_node_is_type(parent, "isa"))
ret = 0;
of_node_put(parent);
}
@@ -738,23 +732,19 @@
* BUG() in that case.
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define KERNEL_COHERENCY 0
-#else
-#define KERNEL_COHERENCY 1
-#endif
+#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
static int __init check_cache_coherency(void)
{
struct device_node *np;
const void *prop;
- int devtree_coherency;
+ bool devtree_coherency;
np = of_find_node_by_path("/");
prop = of_get_property(np, "coherency-off", NULL);
of_node_put(np);
- devtree_coherency = prop ? 0 : 1;
+ devtree_coherency = prop ? false : true;
if (devtree_coherency != KERNEL_COHERENCY) {
printk(KERN_ERR
@@ -788,22 +778,9 @@
pr_info("%s\n", s);
}
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
- pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- pdev->dev.dma_mask = &pdev->archdata.dma_mask;
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
-}
-
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
-#ifdef CONFIG_PPC_BOOK3S_64
- pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
-#endif
-#ifdef CONFIG_PPC_STD_MMU_32
- pr_info("Hash_size = 0x%lx\n", Hash_size);
-#endif
pr_info("phys_mem_size = 0x%llx\n",
(unsigned long long)memblock_phys_mem_size());
@@ -823,20 +800,15 @@
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+ pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+#endif
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- if (htab_address)
- pr_info("htab_address = 0x%p\n", htab_address);
- if (htab_hash_mask)
- pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
-#endif
-#ifdef CONFIG_PPC_STD_MMU_32
- if (Hash)
- pr_info("Hash = 0x%p\n", Hash);
- if (Hash_mask)
- pr_info("Hash_mask = 0x%lx\n", Hash_mask);
-#endif
+ if (!early_radix_enabled())
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
@@ -867,6 +839,8 @@
*/
void __init setup_arch(char **cmdline_p)
{
+ kasan_init();
+
*cmdline_p = boot_command_line;
/* Set a half-reasonable default so udelay does something sensible */
@@ -939,50 +913,38 @@
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();
- klp_init_thread_info(&init_thread_info);
+ klp_init_thread_info(&init_task);
init_mm.start_code = (unsigned long)_stext;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
-#ifdef CONFIG_PPC_MM_SLICES
-#ifdef CONFIG_PPC64
- if (!radix_enabled())
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#elif defined(CONFIG_PPC_8xx)
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#else
-#error "context.addr_limit not initialized."
-#endif
-#endif
-
-#ifdef CONFIG_SPAPR_TCE_IOMMU
mm_iommu_init(&init_mm);
-#endif
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
initmem_init();
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+
+ if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
+ conswitchp = &dummy_con;
+
if (ppc_md.setup_arch)
ppc_md.setup_arch();
setup_barrier_nospec();
+ setup_spectre_v2();
paging_init();
/* Initialize the MMU context management stuff. */
mmu_context_init();
-#ifdef CONFIG_PPC64
/* Interrupt code needs to be 64K-aligned. */
- if ((unsigned long)_stext & 0xffff)
+ if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n",
(unsigned long)_stext);
-#endif
}
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index c6a592b..c82577c 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Prototypes for functions that are shared between setup_(32|64|common).c
*
* Copyright 2016 Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8c507be..a7541ed 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common prep/pmac/chrp boot and setup code.
*/
@@ -17,6 +18,7 @@
#include <linux/console.h>
#include <linux/memblock.h>
#include <linux/export.h>
+#include <linux/nvram.h>
#include <asm/io.h>
#include <asm/prom.h>
@@ -59,39 +61,10 @@
unsigned int DMA_MODE_READ;
unsigned int DMA_MODE_WRITE;
-EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
/*
- * We're called here very early in the boot.
- *
- * Note that the kernel may be running at an address which is different
- * from the address that it was linked at, so we must use RELOC/PTRRELOC
- * to access static data (including strings). -- paulus
- */
-notrace unsigned long __init early_init(unsigned long dt_ptr)
-{
- unsigned long offset = reloc_offset();
-
- /* First zero the BSS -- use memset_io, some platforms don't have
- * caches on yet */
- memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
- __bss_stop - __bss_start);
-
- /*
- * Identify the CPU type and fix up code sections
- * that depend on which cpu we have.
- */
- identify_cpu(offset, mfspr(SPRN_PVR));
-
- apply_feature_fixups();
-
- return KERNELBASE + offset;
-}
-
-
-/*
* This is run before start_kernel(), the kernel has been relocated
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
@@ -101,8 +74,7 @@
*/
notrace void __init machine_init(u64 dt_ptr)
{
- unsigned int *addr = (unsigned int *)((unsigned long)&patch__memset_nocache +
- patch__memset_nocache);
+ unsigned int *addr = (unsigned int *)patch_site_addr(&patch__memset_nocache);
unsigned long insn;
/* Configure static keys first, now that we're relocated. */
@@ -149,41 +121,6 @@
}
__setup("l3cr=", ppc_setup_l3cr);
-#ifdef CONFIG_GENERIC_NVRAM
-
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
- if (ppc_md.nvram_read_val)
- return ppc_md.nvram_read_val(addr);
- return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
- if (ppc_md.nvram_write_val)
- ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-ssize_t nvram_get_size(void)
-{
- if (ppc_md.nvram_size)
- return ppc_md.nvram_size();
- return -1;
-}
-EXPORT_SYMBOL(nvram_get_size);
-
-void nvram_sync(void)
-{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
static int __init ppc_init(void)
{
/* clear the progress line */
@@ -198,6 +135,17 @@
}
arch_initcall(ppc_init);
+static void *__init alloc_stack(void)
+{
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+
+ if (!ptr)
+ panic("cannot allocate %d bytes for stack at %pS\n",
+ THREAD_SIZE, (void *)_RET_IP_);
+
+ return ptr;
+}
+
void __init irqstack_early_init(void)
{
unsigned int i;
@@ -205,10 +153,8 @@
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ softirq_ctx[i] = alloc_stack();
+ hardirq_ctx[i] = alloc_stack();
}
}
@@ -226,13 +172,10 @@
hw_cpu = 0;
#endif
- critirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ critirq_ctx[hw_cpu] = alloc_stack();
#ifdef CONFIG_BOOKE
- dbgirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
- mcheckirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[hw_cpu] = alloc_stack();
+ mcheckirq_ctx[hw_cpu] = alloc_stack();
#endif
}
}
@@ -240,7 +183,7 @@
void __init setup_power_save(void)
{
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
cpu_has_feature(CPU_FTR_CAN_NAP))
ppc_md.power_save = ppc6xx_idle;
@@ -263,6 +206,6 @@
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
- if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index faf0022..44b4c43 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
*
* Common boot and setup code.
*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/export.h>
@@ -29,10 +25,9 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
#include <linux/lockdep.h>
-#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/nmi.h>
@@ -69,6 +64,7 @@
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
#include "setup.h"
@@ -332,6 +328,12 @@
*/
configure_exceptions();
+ /*
+ * Configure Kernel Userspace Protection. This needs to happen before
+ * feature fixups for platforms that implement this using features.
+ */
+ setup_kup();
+
/* Apply all the dynamic patching */
apply_feature_fixups();
setup_feature_keys();
@@ -384,6 +386,9 @@
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+ /* Perform any KUP setup that is per-cpu */
+ setup_kup();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
@@ -635,17 +640,17 @@
static void *__init alloc_stack(unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
- pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
- early_cpu_to_node(cpu), MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
- if (!pa)
- panic("cannot allocate stacks");
- }
+ BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- return __va(pa);
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ MEMBLOCK_LOW_LIMIT, limit,
+ early_cpu_to_node(cpu));
+ if (!ptr)
+ panic("cannot allocate stacks");
+
+ return ptr;
}
void __init irqstack_early_init(void)
@@ -691,24 +696,6 @@
#endif
/*
- * Emergency stacks are used for a range of things, from asynchronous
- * NMIs (system reset, machine check) to synchronous, process context.
- * We set preempt_count to zero, even though that isn't necessarily correct. To
- * get the right value we'd need to copy it from the previous thread_info, but
- * doing that might fault causing more problems.
- * TODO: what to do with accounting?
- */
-static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
-{
- ti->task = NULL;
- ti->cpu = cpu;
- ti->preempt_count = 0;
- ti->local_flags = 0;
- ti->flags = 0;
- klp_init_thread_info(ti);
-}
-
-/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -735,25 +722,14 @@
limit = min(ppc64_bolted_size(), ppc64_rma_size);
for_each_possible_cpu(i) {
- struct thread_info *ti;
-
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#endif
}
}
@@ -763,13 +739,15 @@
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
- return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
- __pa(MAX_DMA_ADDRESS));
+ return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ early_cpu_to_node(cpu));
+
}
static void __init pcpu_fc_free(void *ptr, size_t size)
{
- free_bootmem(__pa(ptr), size);
+ memblock_free(__pa(ptr), size);
}
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
@@ -930,8 +908,13 @@
* hardware prefetch runoff. We don't have a recipe for load patterns to
* reliably avoid the prefetcher.
*/
- l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
- memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+ l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+ l1d_size, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!l1d_flush_fallback_area)
+ panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+ __func__, l1d_size * 2, l1d_size, &limit);
+
for_each_possible_cpu(cpu) {
struct paca_struct *paca = paca_ptrs[cpu];
@@ -955,7 +938,7 @@
enabled_flush_types = types;
- if (!no_rfi_flush)
+ if (!no_rfi_flush && !cpu_mitigations_off())
rfi_flush_enable(enable);
}
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index b3e8db3..e6c30ce 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -44,7 +44,7 @@
newsp = (oldsp - frame_size) & ~0xFUL;
/* Check access */
- if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp))
+ if (!access_ok((void __user *)newsp, oldsp - newsp))
return NULL;
return (void __user *)newsp;
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index e6474a4..98600b2 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Signal handling for 32bit PPC and 32bit tasks on 64bit PPC
*
@@ -10,11 +11,6 @@
* Derived from "arch/i386/kernel/signal.c"
* Copyright (C) 1991, 1992 Linus Torvalds
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
@@ -470,9 +466,9 @@
return 1;
if (sigret) {
- /* Set up the sigreturn trampoline: li r0,sigret; sc */
- if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
- || __put_user(0x44000002UL, &frame->tramp[1]))
+ /* Set up the sigreturn trampoline: li 0,sigret; sc */
+ if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0])
+ || __put_user(PPC_INST_SC, &frame->tramp[1]))
return 1;
flush_icache_range((unsigned long) &frame->tramp[0],
(unsigned long) &frame->tramp[2]);
@@ -619,9 +615,9 @@
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
return 1;
if (sigret) {
- /* Set up the sigreturn trampoline: li r0,sigret; sc */
- if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
- || __put_user(0x44000002UL, &frame->tramp[1]))
+ /* Set up the sigreturn trampoline: li 0,sigret; sc */
+ if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0])
+ || __put_user(PPC_INST_SC, &frame->tramp[1]))
return 1;
flush_icache_range((unsigned long) &frame->tramp[0],
(unsigned long) &frame->tramp[2]);
@@ -848,7 +844,23 @@
/* If TM bits are set to the reserved value, it's an invalid context */
if (MSR_TM_RESV(msr_hi))
return 1;
- /* Pull in the MSR TM bits from the user context */
+
+ /*
+ * Disabling preemption, since it is unsafe to be preempted
+ * with MSR[TS] set without recheckpointing.
+ */
+ preempt_disable();
+
+ /*
+ * CAUTION:
+ * After regs->MSR[TS] being updated, make sure that get_user(),
+ * put_user() or similar functions are *not* called. These
+ * functions can generate page faults which will cause the process
+ * to be de-scheduled with MSR[TS] set but without calling
+ * tm_recheckpoint(). This can cause a bug.
+ *
+ * Pull in the MSR TM bits from the user context
+ */
regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
@@ -873,6 +885,8 @@
}
#endif
+ preempt_enable();
+
return 0;
}
#endif
@@ -999,7 +1013,7 @@
#else
if (__get_user(mcp, &ucp->uc_regs))
return -EFAULT;
- if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
+ if (!access_ok(mcp, sizeof(*mcp)))
return -EFAULT;
#endif
set_current_blocked(&set);
@@ -1102,7 +1116,7 @@
*/
mctx = (struct mcontext __user *)
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
- if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
+ if (!access_ok(old_ctx, ctx_size)
|| save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
|| put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked)
|| __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
@@ -1110,7 +1124,7 @@
}
if (new_ctx == NULL)
return 0;
- if (!access_ok(VERIFY_READ, new_ctx, ctx_size) ||
+ if (!access_ok(new_ctx, ctx_size) ||
fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
return -EFAULT;
@@ -1140,18 +1154,18 @@
{
struct rt_sigframe __user *rt_sf;
struct pt_regs *regs = current_pt_regs();
+ int tm_restore = 0;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct ucontext __user *uc_transact;
unsigned long msr_hi;
unsigned long tmp;
- int tm_restore = 0;
#endif
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
rt_sf = (struct rt_sigframe __user *)
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
- if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
+ if (!access_ok(rt_sf, sizeof(*rt_sf)))
goto bad;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1184,6 +1198,9 @@
goto bad;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto bad;
/* We only recheckpoint on return if we're
* transaction.
*/
@@ -1192,11 +1209,19 @@
goto bad;
}
}
- if (!tm_restore)
- /* Fall through, for non-TM restore */
+ if (!tm_restore) {
+ /*
+ * Unset regs->msr because ucontext MSR TS is not
+ * set, and recheckpoint was not called. This avoid
+ * hitting a TM Bad thing at RFID
+ */
+ regs->msr &= ~MSR_TS_MASK;
+ }
+ /* Fall through, for non-TM restore */
#endif
- if (do_setcontext(&rt_sf->uc, regs, 1))
- goto bad;
+ if (!tm_restore)
+ if (do_setcontext(&rt_sf->uc, regs, 1))
+ goto bad;
/*
* It's not clear whether or why it is desirable to save the
@@ -1223,7 +1248,7 @@
current->comm, current->pid,
rt_sf, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -1289,7 +1314,7 @@
current->thread.debug.dbcr0 = new_dbcr0;
#endif
- if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) ||
+ if (!access_ok(ctx, sizeof(*ctx)) ||
fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
return -EFAULT;
@@ -1312,7 +1337,7 @@
current->comm, current->pid,
ctx, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1474,7 +1499,7 @@
{
sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
addr = sr;
- if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
+ if (!access_ok(sr, sizeof(*sr))
|| restore_user_regs(regs, sr, 1))
goto badframe;
}
@@ -1490,6 +1515,6 @@
current->comm, current->pid,
addr, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 83d51bf..1175155 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -5,11 +6,6 @@
* Derived from "arch/i386/kernel/signal.c"
* Copyright (C) 1991, 1992 Linus Torvalds
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
@@ -383,7 +379,7 @@
err |= __get_user(v_regs, &sc->v_regs);
if (err)
return err;
- if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+ if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && (msr & MSR_VEC) != 0) {
@@ -467,20 +463,6 @@
if (MSR_TM_RESV(msr))
return -EINVAL;
- /* pull in MSR TS bits from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
-
- /*
- * Ensure that TM is enabled in regs->msr before we leave the signal
- * handler. It could be the case that (a) user disabled the TM bit
- * through the manipulation of the MSR bits in uc_mcontext or (b) the
- * TM bit was disabled because a sufficient number of context switches
- * happened whilst in the signal handler and load_tm overflowed,
- * disabling the TM bit. In either case we can end up with an illegal
- * TM state leading to a TM Bad Thing when we return to userspace.
- */
- regs->msr |= MSR_TM;
-
/* pull in MSR LE from user context */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
@@ -516,10 +498,9 @@
err |= __get_user(tm_v_regs, &tm_sc->v_regs);
if (err)
return err;
- if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+ if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
- if (tm_v_regs && !access_ok(VERIFY_READ,
- tm_v_regs, 34 * sizeof(vector128)))
+ if (tm_v_regs && !access_ok(tm_v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
@@ -572,6 +553,34 @@
tm_enable();
/* Make sure the transaction is marked as failed */
tsk->thread.tm_texasr |= TEXASR_FS;
+
+ /*
+ * Disabling preemption, since it is unsafe to be preempted
+ * with MSR[TS] set without recheckpointing.
+ */
+ preempt_disable();
+
+ /* pull in MSR TS bits from user context */
+ regs->msr |= msr & MSR_TS_MASK;
+
+ /*
+ * Ensure that TM is enabled in regs->msr before we leave the signal
+ * handler. It could be the case that (a) user disabled the TM bit
+ * through the manipulation of the MSR bits in uc_mcontext or (b) the
+ * TM bit was disabled because a sufficient number of context switches
+ * happened whilst in the signal handler and load_tm overflowed,
+ * disabling the TM bit. In either case we can end up with an illegal
+ * TM state leading to a TM Bad Thing when we return to userspace.
+ *
+ * CAUTION:
+ * After regs->MSR[TS] being updated, make sure that get_user(),
+ * put_user() or similar functions are *not* called. These
+ * functions can generate page faults which will cause the process
+ * to be de-scheduled with MSR[TS] set but without calling
+ * tm_recheckpoint(). This can cause a bug.
+ */
+ regs->msr |= MSR_TM;
+
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&tsk->thread);
@@ -585,6 +594,8 @@
regs->msr |= MSR_VEC;
}
+ preempt_enable();
+
return err;
}
#endif
@@ -598,11 +609,12 @@
long err = 0;
/* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
- err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
+ err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
+ (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
/* li r0, __NR_[rt_]sigreturn| */
- err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]);
+ err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]);
/* sc */
- err |= __put_user(0x44000002UL, &tramp[2]);
+ err |= __put_user(PPC_INST_SC, &tramp[2]);
/* Minimal traceback info */
for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
@@ -654,7 +666,7 @@
ctx_has_vsx_region = 1;
if (old_ctx != NULL) {
- if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
+ if (!access_ok(old_ctx, ctx_size)
|| setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
ctx_has_vsx_region)
|| __copy_to_user(&old_ctx->uc_sigmask,
@@ -663,7 +675,7 @@
}
if (new_ctx == NULL)
return 0;
- if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
+ if (!access_ok(new_ctx, ctx_size)
|| __get_user(tmp, (u8 __user *) new_ctx)
|| __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
return -EFAULT;
@@ -708,7 +720,7 @@
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
- if (!access_ok(VERIFY_READ, uc, sizeof(*uc)))
+ if (!access_ok(uc, sizeof(*uc)))
goto badframe;
if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
@@ -729,22 +741,65 @@
if (MSR_TM_SUSPENDED(mfmsr()))
tm_reclaim_current(0);
+ /*
+ * Disable MSR[TS] bit also, so, if there is an exception in the
+ * code below (as a page fault in copy_ckvsx_to_user()), it does
+ * not recheckpoint this task if there was a context switch inside
+ * the exception.
+ *
+ * A major page fault can indirectly call schedule(). A reschedule
+ * process in the middle of an exception can have a side effect
+ * (Changing the CPU MSR[TS] state), since schedule() is called
+ * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+ * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+ * this case, the process continues to be the same in the CPU, but
+ * the CPU state just changed.
+ *
+ * This can cause a TM Bad Thing, since the MSR in the stack will
+ * have the MSR[TS]=0, and this is what will be used to RFID.
+ *
+ * Clearing MSR[TS] state here will avoid a recheckpoint if there
+ * is any process reschedule in kernel space. The MSR[TS] state
+ * does not need to be saved also, since it will be replaced with
+ * the MSR[TS] that came from user context later, at
+ * restore_tm_sigcontexts.
+ */
+ regs->msr &= ~MSR_TS_MASK;
+
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
+
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto badframe;
+
if (__get_user(uc_transact, &uc->uc_link))
goto badframe;
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
- }
- else
- /* Fall through, for non-TM restore */
+ } else
#endif
- if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
- goto badframe;
+ {
+ /*
+ * Fall through, for non-TM restore
+ *
+ * Unset MSR[TS] on the thread regs since MSR from user
+ * context does not have MSR active, and recheckpoint was
+ * not called since restore_tm_sigcontexts() was not called
+ * also.
+ *
+ * If not unsetting it, the code can RFID to userspace with
+ * MSR[TS] set, but without CPU in the proper state,
+ * causing a TM bad thing.
+ */
+ current->thread.regs->msr &= ~MSR_TS_MASK;
+ if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+ goto badframe;
+ }
if (restore_altstack(&uc->uc_stack))
goto badframe;
@@ -758,7 +813,7 @@
current->comm, current->pid, "rt_sigreturn",
(long)uc, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 61c1fad..ea6adbf 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SMP support for ppc.
*
@@ -8,11 +9,6 @@
*
* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -20,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
@@ -34,6 +31,8 @@
#include <linux/topology.h>
#include <linux/profile.h>
#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -73,15 +72,33 @@
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
+bool has_big_cores;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+#define MAX_THREAD_LIST_SIZE 8
+#define THREAD_GROUP_SHARE_L1 1
+struct thread_groups {
+ unsigned int property;
+ unsigned int nr_groups;
+ unsigned int threads_per_group;
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/*
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -338,13 +355,12 @@
* NMI IPIs may not be recoverable, so should not be used as ongoing part of
* a running system. They can be used for crash, debug, halt/reboot, etc.
*
- * NMI IPIs are globally single threaded. No more than one in progress at
- * any time.
- *
* The IPI call waits with interrupts disabled until all targets enter the
- * NMI handler, then the call returns.
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
*
- * No new NMI can be initiated until targets exit the handler.
+ * A new NMI can be issued before all targets exit the handler.
*
* The IPI call may time out without all targets entering the NMI handler.
* In that case, there is some logic to recover (and ignore subsequent
@@ -355,7 +371,7 @@
static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
-static int nmi_ipi_busy_count = 0;
+static bool nmi_ipi_busy = false;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
static void nmi_ipi_lock_start(unsigned long *flags)
@@ -394,7 +410,7 @@
*/
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
- void (*fn)(struct pt_regs *);
+ void (*fn)(struct pt_regs *) = NULL;
unsigned long flags;
int me = raw_smp_processor_id();
int ret = 0;
@@ -405,29 +421,17 @@
* because the caller may have timed out.
*/
nmi_ipi_lock_start(&flags);
- if (!nmi_ipi_busy_count)
- goto out;
- if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
- goto out;
-
- fn = nmi_ipi_function;
- if (!fn)
- goto out;
-
- cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
- nmi_ipi_busy_count++;
- nmi_ipi_unlock();
-
- ret = 1;
-
- fn(regs);
-
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */
- nmi_ipi_busy_count--;
-out:
+ if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ fn = READ_ONCE(nmi_ipi_function);
+ WARN_ON_ONCE(!fn);
+ ret = 1;
+ }
nmi_ipi_unlock_end(&flags);
+ if (fn)
+ fn(regs);
+
return ret;
}
@@ -453,9 +457,10 @@
* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
* - fn is the target callback function.
* - delay_us > 0 is the delay before giving up waiting for targets to
- * complete executing the handler, == 0 specifies indefinite delay.
+ * begin executing the handler, == 0 specifies indefinite delay.
*/
-int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+ u64 delay_us, bool safe)
{
unsigned long flags;
int me = raw_smp_processor_id();
@@ -467,31 +472,33 @@
if (unlikely(!smp_ops))
return 0;
- /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
nmi_ipi_lock_start(&flags);
- while (nmi_ipi_busy_count) {
+ while (nmi_ipi_busy) {
nmi_ipi_unlock_end(&flags);
- spin_until_cond(nmi_ipi_busy_count == 0);
+ spin_until_cond(!nmi_ipi_busy);
nmi_ipi_lock_start(&flags);
}
-
+ nmi_ipi_busy = true;
nmi_ipi_function = fn;
+ WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
if (cpu < 0) {
/* ALL_OTHERS */
cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
} else {
- /* cpumask starts clear */
cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
}
- nmi_ipi_busy_count++;
+
nmi_ipi_unlock();
+ /* Interrupts remain hard disabled */
+
do_smp_send_nmi_ipi(cpu, safe);
nmi_ipi_lock();
- /* nmi_ipi_busy_count is held here, so unlock/lock is okay */
+ /* nmi_ipi_busy is set here, so unlock/lock is okay */
while (!cpumask_empty(&nmi_ipi_pending_mask)) {
nmi_ipi_unlock();
udelay(1);
@@ -503,29 +510,15 @@
}
}
- while (nmi_ipi_busy_count > 1) {
- nmi_ipi_unlock();
- udelay(1);
- nmi_ipi_lock();
- if (delay_us) {
- delay_us--;
- if (!delay_us)
- break;
- }
- }
-
if (!cpumask_empty(&nmi_ipi_pending_mask)) {
/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
ret = 0;
cpumask_clear(&nmi_ipi_pending_mask);
}
- if (nmi_ipi_busy_count > 1) {
- /* Timeout waiting for CPUs to execute fn */
- ret = 0;
- nmi_ipi_busy_count = 1;
- }
- nmi_ipi_busy_count--;
+ nmi_ipi_function = NULL;
+ nmi_ipi_busy = false;
+
nmi_ipi_unlock_end(&flags);
return ret;
@@ -593,17 +586,8 @@
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
- * This is a special case because it never returns, so the NMI IPI
- * handling would never mark it as done, which makes any later
- * smp_send_nmi_ipi() call spin forever. Mark it done now.
- *
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1)
- nmi_ipi_busy_count--;
- nmi_ipi_unlock();
-
spin_begin();
while (1)
spin_cpu_relax();
@@ -643,7 +627,7 @@
}
#endif /* CONFIG_NMI_IPI */
-struct thread_info *current_set[NR_CPUS];
+struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
@@ -674,6 +658,185 @@
}
#endif
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ * property for the CPU device node @dn and stores
+ * the parsed output in the thread_groups
+ * structure @tg if the ibm,thread-groups[0]
+ * matches @property.
+ *
+ * @dn: The device node of the CPU device.
+ * @tg: Pointer to a thread group structure into which the parsed
+ * output of "ibm,thread-groups" is stored.
+ * @property: The property of the thread-group that the caller is
+ * interested in.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * ibm,thread-groups[0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache.
+ *
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
+ *
+ * ibm,thread-groups[2] tells us the number of threads in each such
+ * group.
+ *
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
+ * implies that there are 2 groups of 4 threads each, where each group
+ * of threads share L1, translation cache.
+ *
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
+ * 11, 12} structure
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+ struct thread_groups *tg,
+ unsigned int property)
+{
+ int i;
+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
+ u32 *thread_list;
+ size_t total_threads;
+ int ret;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array, 3);
+ if (ret)
+ return ret;
+
+ tg->property = thread_group_array[0];
+ tg->nr_groups = thread_group_array[1];
+ tg->threads_per_group = thread_group_array[2];
+ if (tg->property != property ||
+ tg->nr_groups < 1 ||
+ tg->threads_per_group < 1)
+ return -ENODATA;
+
+ total_threads = tg->nr_groups * tg->threads_per_group;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array,
+ 3 + total_threads);
+ if (ret)
+ return ret;
+
+ thread_list = &thread_group_array[3];
+
+ for (i = 0 ; i < total_threads; i++)
+ tg->thread_list[i] = thread_list[i];
+
+ return 0;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ * that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ * to.
+ *
+ * Returns the index to tg->thread_list that points to the the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
+ int i, j;
+
+ for (i = 0; i < tg->nr_groups; i++) {
+ int group_start = i * tg->threads_per_group;
+
+ for (j = 0; j < tg->threads_per_group; j++) {
+ int idx = group_start + j;
+
+ if (tg->thread_list[idx] == hw_cpu_id)
+ return group_start;
+ }
+ }
+
+ return -1;
+}
+
+static int init_cpu_l1_cache_map(int cpu)
+
+{
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ struct thread_groups tg = {.property = 0,
+ .nr_groups = 0,
+ .threads_per_group = 0};
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i, cpu_group_start = -1, err = 0;
+
+ if (!dn)
+ return -ENODATA;
+
+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
+ if (err)
+ goto out;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ int i_group_start = get_cpu_thread_group_start(i, &tg);
+
+ if (unlikely(i_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ if (i_group_start == cpu_group_start)
+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+ }
+
+out:
+ of_node_put(dn);
+ return err;
+}
+
+static int init_big_cores(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_cpu_l1_cache_map(cpu);
+
+ if (err)
+ return err;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+
+ has_big_cores = true;
+ return 0;
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -712,6 +875,12 @@
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+ init_big_cores();
+ if (has_big_cores) {
+ cpumask_set_cpu(boot_cpuid,
+ cpu_smallcore_mask(boot_cpuid));
+ }
+
if (smp_ops && smp_ops->probe)
smp_ops->probe();
}
@@ -723,7 +892,7 @@
paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
- current_set[boot_cpuid] = task_thread_info(current);
+ current_set[boot_cpuid] = current;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -808,14 +977,13 @@
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct thread_info *ti = task_thread_info(idle);
-
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
- ti->cpu = cpu;
- secondary_ti = current_set[cpu] = ti;
+ idle->cpu = cpu;
+ secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
@@ -995,10 +1163,28 @@
set_cpus_unrelated(cpu, i, cpu_core_mask);
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ if (has_big_cores)
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
}
}
#endif
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+ struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
+ int i, first_thread = cpu_first_thread_sibling(cpu);
+
+ if (!has_big_cores)
+ return;
+
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
+ }
+}
+
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
@@ -1015,6 +1201,7 @@
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_sibling_mask);
+ add_cpu_to_smallcore_masks(cpu);
/*
* Copy the thread sibling mask into the cache sibling mask
* and mark any CPUs that share an L2 with this CPU.
@@ -1044,6 +1231,7 @@
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -1069,11 +1257,13 @@
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
/*
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
@@ -1083,6 +1273,8 @@
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ boot_init_stack_canary();
+
local_irq_enable();
/* We can enable ftrace for secondary cpus now */
@@ -1140,6 +1332,13 @@
return cpu_l2_cache_mask(cpu);
}
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
static struct sched_domain_topology_level power9_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
@@ -1167,6 +1366,13 @@
shared_proc_topology_init();
dump_numa_cpu_topology();
+#ifdef CONFIG_SCHED_SMT
+ if (has_big_cores) {
+ pr_info("Using small cores at SMT level\n");
+ power9_topology[0].mask = smallcore_smt_mask;
+ powerpc_topology[0].mask = smallcore_smt_mask;
+ }
+#endif
/*
* If any CPU detects that it's sharing a cache with another CPU then
* use the deeper topology that is aware of this sharing.
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2c50b5..e2a46cf 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -67,12 +67,17 @@
{
unsigned long sp;
+ if (!try_get_task_stack(tsk))
+ return;
+
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
save_context_stack(trace, sp, tsk, 0);
+
+ put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
@@ -84,25 +89,21 @@
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-int
-save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
{
unsigned long sp;
+ unsigned long newsp;
unsigned long stack_page = (unsigned long)task_stack_page(tsk);
unsigned long stack_end;
int graph_idx = 0;
-
- /*
- * The last frame (unwinding first) may not yet have saved
- * its LR onto the stack.
- */
- int firstframe = 1;
-
- if (tsk == current)
- sp = current_stack_pointer();
- else
- sp = tsk->thread.ksp;
+ bool firstframe;
stack_end = stack_page + THREAD_SIZE;
if (!is_idle_task(tsk)) {
@@ -129,71 +130,96 @@
stack_end -= STACK_FRAME_OVERHEAD;
}
+ if (tsk == current)
+ sp = current_stack_pointer();
+ else
+ sp = tsk->thread.ksp;
+
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
- return 1;
+ return -EINVAL;
}
- for (;;) {
+ for (firstframe = true; sp != stack_end;
+ firstframe = false, sp = newsp) {
unsigned long *stack = (unsigned long *) sp;
- unsigned long newsp, ip;
+ unsigned long ip;
/* sanity check: ABI requires SP to be aligned 16 bytes. */
if (sp & 0xF)
- return 1;
-
- /* Mark stacktraces with exception frames as unreliable. */
- if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
- stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
- return 1;
- }
+ return -EINVAL;
newsp = stack[0];
/* Stack grows downwards; unwinder may only go up. */
if (newsp <= sp)
- return 1;
+ return -EINVAL;
if (newsp != stack_end &&
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
- return 1; /* invalid backlink, too far up. */
+ return -EINVAL; /* invalid backlink, too far up. */
+ }
+
+ /*
+ * We can only trust the bottom frame's backlink, the
+ * rest of the frame may be uninitialized, continue to
+ * the next.
+ */
+ if (firstframe)
+ continue;
+
+ /* Mark stacktraces with exception frames as unreliable. */
+ if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
+ stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ return -EINVAL;
}
/* Examine the saved LR: it must point into kernel code. */
ip = stack[STACK_FRAME_LR_SAVE];
- if (!firstframe && !__kernel_text_address(ip))
- return 1;
- firstframe = 0;
+ if (!__kernel_text_address(ip))
+ return -EINVAL;
/*
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
- ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
* as unreliable.
*/
if (ip == (unsigned long)kretprobe_trampoline)
- return 1;
+ return -EINVAL;
#endif
+ if (trace->nr_entries >= trace->max_entries)
+ return -E2BIG;
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
-
- if (newsp == stack_end)
- break;
-
- if (trace->nr_entries >= trace->max_entries)
- return -E2BIG;
-
- sp = newsp;
}
return 0;
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
+
+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
+{
+ int ret;
+
+ /*
+ * If the task doesn't have a stack (e.g., a zombie), the stack is
+ * "reliably" empty.
+ */
+ if (!try_get_task_stack(tsk))
+ return 0;
+
+ ret = __save_stack_trace_tsk_reliable(tsk, trace);
+
+ put_task_stack(tsk);
+
+ return ret;
+}
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index a531154..b84992c 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -1,13 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Suspend support specific for power.
*
- * Distribute under GPLv2
- *
* Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
#include <linux/mm.h>
+#include <linux/suspend.h>
#include <asm/page.h>
#include <asm/sections.h>
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index 0050b2d..41dcb21 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common powerpc suspend code for 32 and 64 bits
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index 7a919e9..cbdf862 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -25,11 +25,19 @@
#define SL_IBAT2 0x48
#define SL_DBAT3 0x50
#define SL_IBAT3 0x58
-#define SL_TB 0x60
-#define SL_R2 0x68
-#define SL_CR 0x6c
-#define SL_LR 0x70
-#define SL_R12 0x74 /* r12 to r31 */
+#define SL_DBAT4 0x60
+#define SL_IBAT4 0x68
+#define SL_DBAT5 0x70
+#define SL_IBAT5 0x78
+#define SL_DBAT6 0x80
+#define SL_IBAT6 0x88
+#define SL_DBAT7 0x90
+#define SL_IBAT7 0x98
+#define SL_TB 0xa0
+#define SL_R2 0xa8
+#define SL_CR 0xac
+#define SL_LR 0xb0
+#define SL_R12 0xb4 /* r12 to r31 */
#define SL_SIZE (SL_R12 + 80)
.section .data
@@ -114,6 +122,41 @@
mfibatl r4,3
stw r4,SL_IBAT3+4(r11)
+BEGIN_MMU_FTR_SECTION
+ mfspr r4,SPRN_DBAT4U
+ stw r4,SL_DBAT4(r11)
+ mfspr r4,SPRN_DBAT4L
+ stw r4,SL_DBAT4+4(r11)
+ mfspr r4,SPRN_DBAT5U
+ stw r4,SL_DBAT5(r11)
+ mfspr r4,SPRN_DBAT5L
+ stw r4,SL_DBAT5+4(r11)
+ mfspr r4,SPRN_DBAT6U
+ stw r4,SL_DBAT6(r11)
+ mfspr r4,SPRN_DBAT6L
+ stw r4,SL_DBAT6+4(r11)
+ mfspr r4,SPRN_DBAT7U
+ stw r4,SL_DBAT7(r11)
+ mfspr r4,SPRN_DBAT7L
+ stw r4,SL_DBAT7+4(r11)
+ mfspr r4,SPRN_IBAT4U
+ stw r4,SL_IBAT4(r11)
+ mfspr r4,SPRN_IBAT4L
+ stw r4,SL_IBAT4+4(r11)
+ mfspr r4,SPRN_IBAT5U
+ stw r4,SL_IBAT5(r11)
+ mfspr r4,SPRN_IBAT5L
+ stw r4,SL_IBAT5+4(r11)
+ mfspr r4,SPRN_IBAT6U
+ stw r4,SL_IBAT6(r11)
+ mfspr r4,SPRN_IBAT6L
+ stw r4,SL_IBAT6+4(r11)
+ mfspr r4,SPRN_IBAT7U
+ stw r4,SL_IBAT7(r11)
+ mfspr r4,SPRN_IBAT7L
+ stw r4,SL_IBAT7+4(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
#if 0
/* Backup various CPU config stuffs */
bl __save_cpu_setup
@@ -279,27 +322,41 @@
mtibatu 3,r4
lwz r4,SL_IBAT3+4(r11)
mtibatl 3,r4
-#endif
-
BEGIN_MMU_FTR_SECTION
- li r4,0
+ lwz r4,SL_DBAT4(r11)
mtspr SPRN_DBAT4U,r4
+ lwz r4,SL_DBAT4+4(r11)
mtspr SPRN_DBAT4L,r4
+ lwz r4,SL_DBAT5(r11)
mtspr SPRN_DBAT5U,r4
+ lwz r4,SL_DBAT5+4(r11)
mtspr SPRN_DBAT5L,r4
+ lwz r4,SL_DBAT6(r11)
mtspr SPRN_DBAT6U,r4
+ lwz r4,SL_DBAT6+4(r11)
mtspr SPRN_DBAT6L,r4
+ lwz r4,SL_DBAT7(r11)
mtspr SPRN_DBAT7U,r4
+ lwz r4,SL_DBAT7+4(r11)
mtspr SPRN_DBAT7L,r4
+ lwz r4,SL_IBAT4(r11)
mtspr SPRN_IBAT4U,r4
+ lwz r4,SL_IBAT4+4(r11)
mtspr SPRN_IBAT4L,r4
+ lwz r4,SL_IBAT5(r11)
mtspr SPRN_IBAT5U,r4
+ lwz r4,SL_IBAT5+4(r11)
mtspr SPRN_IBAT5L,r4
+ lwz r4,SL_IBAT6(r11)
mtspr SPRN_IBAT6U,r4
+ lwz r4,SL_IBAT6+4(r11)
mtspr SPRN_IBAT6L,r4
+ lwz r4,SL_IBAT7(r11)
mtspr SPRN_IBAT7U,r4
+ lwz r4,SL_IBAT7+4(r11)
mtspr SPRN_IBAT7L,r4
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+#endif
/* Flush all TLBs */
lis r4,0x1000
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index 51db012..aeea97a 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PowerPC 64-bit swsusp implementation
*
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * GPLv2
*/
#include <asm/iommu.h>
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index f83bf6f..6d31898 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -1,9 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PowerPC 64-bit swsusp implementation
*
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * GPLv2
*/
#include <linux/threads.h>
@@ -262,7 +261,7 @@
addi r1,r1,-128
#ifdef CONFIG_PPC_BOOK3S_64
- bl slb_flush_and_rebolt
+ bl slb_flush_and_restore_bolted
#endif
bl do_after_copyback
addi r1,r1,128
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index bdf58ba..d36c639 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* sys_ppc32.c: Conversion between 32bit and 64bit native syscalls.
*
@@ -7,11 +8,6 @@
*
* These routines maintain argument size conversion between 32bit and 64bit
* environment.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 4662165..3bfb388 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Implementation of various system calls for Linux/PowerPC
*
@@ -11,12 +12,6 @@
* This file contains various random system calls that
* have a non-standard calling sequence on the Linux/PPC
* platform.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/errno.h>
@@ -89,7 +84,7 @@
if ( (unsigned long)n >= 4096 )
{
unsigned long __user *buffer = (unsigned long __user *)n;
- if (!access_ok(VERIFY_READ, buffer, 5*sizeof(unsigned long))
+ if (!access_ok(buffer, 5*sizeof(unsigned long))
|| __get_user(n, buffer)
|| __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
|| __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
@@ -123,7 +118,7 @@
(u64)len_high << 32 | len_low, advice);
}
-long sys_switch_endian(void)
+SYSCALL_DEFINE0(switch_endian)
{
struct thread_info *ti;
diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile
new file mode 100644
index 0000000..27b4895
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/Makefile
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
+ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+syscall := $(srctree)/$(src)/syscall.tbl
+syshdr := $(srctree)/$(src)/syscallhdr.sh
+systbl := $(srctree)/$(src)/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
+ '$(syshdr_abis_$(basetarget))' \
+ '$(syshdr_pfx_$(basetarget))' \
+ '$(syshdr_offset_$(basetarget))'
+
+quiet_cmd_systbl = SYSTBL $@
+ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
+ '$(systbl_abis_$(basetarget))' \
+ '$(systbl_abi_$(basetarget))' \
+ '$(systbl_offset_$(basetarget))'
+
+syshdr_abis_unistd_32 := common,nospu,32
+$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+ $(call if_changed,syshdr)
+
+syshdr_abis_unistd_64 := common,nospu,64
+$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+ $(call if_changed,syshdr)
+
+systbl_abis_syscall_table_32 := common,nospu,32
+systbl_abi_syscall_table_32 := 32
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+systbl_abis_syscall_table_64 := common,nospu,64
+systbl_abi_syscall_table_64 := 64
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+systbl_abis_syscall_table_c32 := common,nospu,32
+systbl_abi_syscall_table_c32 := c32
+$(kapi)/syscall_table_c32.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+systbl_abis_syscall_table_spu := common,spu
+systbl_abi_syscall_table_spu := spu
+$(kapi)/syscall_table_spu.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+uapisyshdr-y += unistd_32.h unistd_64.h
+kapisyshdr-y += syscall_table_32.h \
+ syscall_table_64.h \
+ syscall_table_c32.h \
+ syscall_table_spu.h
+
+targets += $(uapisyshdr-y) $(kapisyshdr-y)
+
+PHONY += all
+all: $(addprefix $(uapi)/,$(uapisyshdr-y))
+all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+ @:
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
new file mode 100644
index 0000000..43f736e
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -0,0 +1,519 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for powerpc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, spu, nospu, 64, or 32 for this file.
+#
+0 nospu restart_syscall sys_restart_syscall
+1 nospu exit sys_exit
+2 nospu fork ppc_fork
+3 common read sys_read
+4 common write sys_write
+5 common open sys_open compat_sys_open
+6 common close sys_close
+7 common waitpid sys_waitpid
+8 common creat sys_creat
+9 common link sys_link
+10 common unlink sys_unlink
+11 nospu execve sys_execve compat_sys_execve
+12 common chdir sys_chdir
+13 32 time sys_time32
+13 64 time sys_time
+13 spu time sys_time
+14 common mknod sys_mknod
+15 common chmod sys_chmod
+16 common lchown sys_lchown
+17 common break sys_ni_syscall
+18 32 oldstat sys_stat sys_ni_syscall
+18 64 oldstat sys_ni_syscall
+18 spu oldstat sys_ni_syscall
+19 common lseek sys_lseek compat_sys_lseek
+20 common getpid sys_getpid
+21 nospu mount sys_mount compat_sys_mount
+22 32 umount sys_oldumount
+22 64 umount sys_ni_syscall
+22 spu umount sys_ni_syscall
+23 common setuid sys_setuid
+24 common getuid sys_getuid
+25 32 stime sys_stime32
+25 64 stime sys_stime
+25 spu stime sys_stime
+26 nospu ptrace sys_ptrace compat_sys_ptrace
+27 common alarm sys_alarm
+28 32 oldfstat sys_fstat sys_ni_syscall
+28 64 oldfstat sys_ni_syscall
+28 spu oldfstat sys_ni_syscall
+29 nospu pause sys_pause
+30 32 utime sys_utime32
+30 64 utime sys_utime
+31 common stty sys_ni_syscall
+32 common gtty sys_ni_syscall
+33 common access sys_access
+34 common nice sys_nice
+35 common ftime sys_ni_syscall
+36 common sync sys_sync
+37 common kill sys_kill
+38 common rename sys_rename
+39 common mkdir sys_mkdir
+40 common rmdir sys_rmdir
+41 common dup sys_dup
+42 common pipe sys_pipe
+43 common times sys_times compat_sys_times
+44 common prof sys_ni_syscall
+45 common brk sys_brk
+46 common setgid sys_setgid
+47 common getgid sys_getgid
+48 nospu signal sys_signal
+49 common geteuid sys_geteuid
+50 common getegid sys_getegid
+51 nospu acct sys_acct
+52 nospu umount2 sys_umount
+53 common lock sys_ni_syscall
+54 common ioctl sys_ioctl compat_sys_ioctl
+55 common fcntl sys_fcntl compat_sys_fcntl
+56 common mpx sys_ni_syscall
+57 common setpgid sys_setpgid
+58 common ulimit sys_ni_syscall
+59 32 oldolduname sys_olduname
+59 64 oldolduname sys_ni_syscall
+59 spu oldolduname sys_ni_syscall
+60 common umask sys_umask
+61 common chroot sys_chroot
+62 nospu ustat sys_ustat compat_sys_ustat
+63 common dup2 sys_dup2
+64 common getppid sys_getppid
+65 common getpgrp sys_getpgrp
+66 common setsid sys_setsid
+67 32 sigaction sys_sigaction compat_sys_sigaction
+67 64 sigaction sys_ni_syscall
+67 spu sigaction sys_ni_syscall
+68 common sgetmask sys_sgetmask
+69 common ssetmask sys_ssetmask
+70 common setreuid sys_setreuid
+71 common setregid sys_setregid
+72 32 sigsuspend sys_sigsuspend
+72 64 sigsuspend sys_ni_syscall
+72 spu sigsuspend sys_ni_syscall
+73 32 sigpending sys_sigpending compat_sys_sigpending
+73 64 sigpending sys_ni_syscall
+73 spu sigpending sys_ni_syscall
+74 common sethostname sys_sethostname
+75 common setrlimit sys_setrlimit compat_sys_setrlimit
+76 32 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+76 64 getrlimit sys_ni_syscall
+76 spu getrlimit sys_ni_syscall
+77 common getrusage sys_getrusage compat_sys_getrusage
+78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 common settimeofday sys_settimeofday compat_sys_settimeofday
+80 common getgroups sys_getgroups
+81 common setgroups sys_setgroups
+82 32 select ppc_select sys_ni_syscall
+82 64 select sys_ni_syscall
+82 spu select sys_ni_syscall
+83 common symlink sys_symlink
+84 32 oldlstat sys_lstat sys_ni_syscall
+84 64 oldlstat sys_ni_syscall
+84 spu oldlstat sys_ni_syscall
+85 common readlink sys_readlink
+86 nospu uselib sys_uselib
+87 nospu swapon sys_swapon
+88 nospu reboot sys_reboot
+89 32 readdir sys_old_readdir compat_sys_old_readdir
+89 64 readdir sys_ni_syscall
+89 spu readdir sys_ni_syscall
+90 common mmap sys_mmap
+91 common munmap sys_munmap
+92 common truncate sys_truncate compat_sys_truncate
+93 common ftruncate sys_ftruncate compat_sys_ftruncate
+94 common fchmod sys_fchmod
+95 common fchown sys_fchown
+96 common getpriority sys_getpriority
+97 common setpriority sys_setpriority
+98 common profil sys_ni_syscall
+99 nospu statfs sys_statfs compat_sys_statfs
+100 nospu fstatfs sys_fstatfs compat_sys_fstatfs
+101 common ioperm sys_ni_syscall
+102 common socketcall sys_socketcall compat_sys_socketcall
+103 common syslog sys_syslog
+104 common setitimer sys_setitimer compat_sys_setitimer
+105 common getitimer sys_getitimer compat_sys_getitimer
+106 common stat sys_newstat compat_sys_newstat
+107 common lstat sys_newlstat compat_sys_newlstat
+108 common fstat sys_newfstat compat_sys_newfstat
+109 32 olduname sys_uname
+109 64 olduname sys_ni_syscall
+109 spu olduname sys_ni_syscall
+110 common iopl sys_ni_syscall
+111 common vhangup sys_vhangup
+112 common idle sys_ni_syscall
+113 common vm86 sys_ni_syscall
+114 common wait4 sys_wait4 compat_sys_wait4
+115 nospu swapoff sys_swapoff
+116 common sysinfo sys_sysinfo compat_sys_sysinfo
+117 nospu ipc sys_ipc compat_sys_ipc
+118 common fsync sys_fsync
+119 32 sigreturn sys_sigreturn compat_sys_sigreturn
+119 64 sigreturn sys_ni_syscall
+119 spu sigreturn sys_ni_syscall
+120 nospu clone ppc_clone
+121 common setdomainname sys_setdomainname
+122 common uname sys_newuname
+123 common modify_ldt sys_ni_syscall
+124 32 adjtimex sys_adjtimex_time32
+124 64 adjtimex sys_adjtimex
+124 spu adjtimex sys_adjtimex
+125 common mprotect sys_mprotect
+126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+126 64 sigprocmask sys_ni_syscall
+126 spu sigprocmask sys_ni_syscall
+127 common create_module sys_ni_syscall
+128 nospu init_module sys_init_module
+129 nospu delete_module sys_delete_module
+130 common get_kernel_syms sys_ni_syscall
+131 nospu quotactl sys_quotactl
+132 common getpgid sys_getpgid
+133 common fchdir sys_fchdir
+134 common bdflush sys_bdflush
+135 common sysfs sys_sysfs
+136 32 personality sys_personality ppc64_personality
+136 64 personality ppc64_personality
+136 spu personality ppc64_personality
+137 common afs_syscall sys_ni_syscall
+138 common setfsuid sys_setfsuid
+139 common setfsgid sys_setfsgid
+140 common _llseek sys_llseek
+141 common getdents sys_getdents compat_sys_getdents
+142 common _newselect sys_select compat_sys_select
+143 common flock sys_flock
+144 common msync sys_msync
+145 common readv sys_readv compat_sys_readv
+146 common writev sys_writev compat_sys_writev
+147 common getsid sys_getsid
+148 common fdatasync sys_fdatasync
+149 nospu _sysctl sys_sysctl compat_sys_sysctl
+150 common mlock sys_mlock
+151 common munlock sys_munlock
+152 common mlockall sys_mlockall
+153 common munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam
+155 common sched_getparam sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min
+161 32 sched_rr_get_interval sys_sched_rr_get_interval_time32
+161 64 sched_rr_get_interval sys_sched_rr_get_interval
+161 spu sched_rr_get_interval sys_sched_rr_get_interval
+162 32 nanosleep sys_nanosleep_time32
+162 64 nanosleep sys_nanosleep
+162 spu nanosleep sys_nanosleep
+163 common mremap sys_mremap
+164 common setresuid sys_setresuid
+165 common getresuid sys_getresuid
+166 common query_module sys_ni_syscall
+167 common poll sys_poll
+168 common nfsservctl sys_ni_syscall
+169 common setresgid sys_setresgid
+170 common getresgid sys_getresgid
+171 common prctl sys_prctl
+172 nospu rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+176 32 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+176 64 rt_sigtimedwait sys_rt_sigtimedwait
+177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+179 common pread64 sys_pread64 compat_sys_pread64
+180 common pwrite64 sys_pwrite64 compat_sys_pwrite64
+181 common chown sys_chown
+182 common getcwd sys_getcwd
+183 common capget sys_capget
+184 common capset sys_capset
+185 nospu sigaltstack sys_sigaltstack compat_sys_sigaltstack
+186 32 sendfile sys_sendfile compat_sys_sendfile
+186 64 sendfile sys_sendfile64
+186 spu sendfile sys_sendfile64
+187 common getpmsg sys_ni_syscall
+188 common putpmsg sys_ni_syscall
+189 nospu vfork ppc_vfork
+190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
+191 common readahead sys_readahead compat_sys_readahead
+192 32 mmap2 sys_mmap2 compat_sys_mmap2
+193 32 truncate64 sys_truncate64 compat_sys_truncate64
+194 32 ftruncate64 sys_ftruncate64 compat_sys_ftruncate64
+195 32 stat64 sys_stat64
+196 32 lstat64 sys_lstat64
+197 32 fstat64 sys_fstat64
+198 nospu pciconfig_read sys_pciconfig_read
+199 nospu pciconfig_write sys_pciconfig_write
+200 nospu pciconfig_iobase sys_pciconfig_iobase
+201 common multiplexer sys_ni_syscall
+202 common getdents64 sys_getdents64
+203 common pivot_root sys_pivot_root
+204 32 fcntl64 sys_fcntl64 compat_sys_fcntl64
+205 common madvise sys_madvise
+206 common mincore sys_mincore
+207 common gettid sys_gettid
+208 common tkill sys_tkill
+209 common setxattr sys_setxattr
+210 common lsetxattr sys_lsetxattr
+211 common fsetxattr sys_fsetxattr
+212 common getxattr sys_getxattr
+213 common lgetxattr sys_lgetxattr
+214 common fgetxattr sys_fgetxattr
+215 common listxattr sys_listxattr
+216 common llistxattr sys_llistxattr
+217 common flistxattr sys_flistxattr
+218 common removexattr sys_removexattr
+219 common lremovexattr sys_lremovexattr
+220 common fremovexattr sys_fremovexattr
+221 32 futex sys_futex_time32
+221 64 futex sys_futex
+221 spu futex sys_futex
+222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+# 224 unused
+225 common tuxcall sys_ni_syscall
+226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64
+227 common io_setup sys_io_setup compat_sys_io_setup
+228 common io_destroy sys_io_destroy
+229 32 io_getevents sys_io_getevents_time32
+229 64 io_getevents sys_io_getevents
+229 spu io_getevents sys_io_getevents
+230 common io_submit sys_io_submit compat_sys_io_submit
+231 common io_cancel sys_io_cancel
+232 nospu set_tid_address sys_set_tid_address
+233 common fadvise64 sys_fadvise64 ppc32_fadvise64
+234 nospu exit_group sys_exit_group
+235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+236 common epoll_create sys_epoll_create
+237 common epoll_ctl sys_epoll_ctl
+238 common epoll_wait sys_epoll_wait
+239 common remap_file_pages sys_remap_file_pages
+240 common timer_create sys_timer_create compat_sys_timer_create
+241 32 timer_settime sys_timer_settime32
+241 64 timer_settime sys_timer_settime
+241 spu timer_settime sys_timer_settime
+242 32 timer_gettime sys_timer_gettime32
+242 64 timer_gettime sys_timer_gettime
+242 spu timer_gettime sys_timer_gettime
+243 common timer_getoverrun sys_timer_getoverrun
+244 common timer_delete sys_timer_delete
+245 32 clock_settime sys_clock_settime32
+245 64 clock_settime sys_clock_settime
+245 spu clock_settime sys_clock_settime
+246 32 clock_gettime sys_clock_gettime32
+246 64 clock_gettime sys_clock_gettime
+246 spu clock_gettime sys_clock_gettime
+247 32 clock_getres sys_clock_getres_time32
+247 64 clock_getres sys_clock_getres
+247 spu clock_getres sys_clock_getres
+248 32 clock_nanosleep sys_clock_nanosleep_time32
+248 64 clock_nanosleep sys_clock_nanosleep
+248 spu clock_nanosleep sys_clock_nanosleep
+249 32 swapcontext ppc_swapcontext ppc32_swapcontext
+249 64 swapcontext ppc64_swapcontext
+249 spu swapcontext sys_ni_syscall
+250 common tgkill sys_tgkill
+251 32 utimes sys_utimes_time32
+251 64 utimes sys_utimes
+251 spu utimes sys_utimes
+252 common statfs64 sys_statfs64 compat_sys_statfs64
+253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+254 32 fadvise64_64 ppc_fadvise64_64
+254 spu fadvise64_64 sys_ni_syscall
+255 common rtas sys_rtas
+256 32 sys_debug_setcontext sys_debug_setcontext sys_ni_syscall
+256 64 sys_debug_setcontext sys_ni_syscall
+256 spu sys_debug_setcontext sys_ni_syscall
+# 257 reserved for vserver
+258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages
+259 nospu mbind sys_mbind compat_sys_mbind
+260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
+262 nospu mq_open sys_mq_open compat_sys_mq_open
+263 nospu mq_unlink sys_mq_unlink
+264 32 mq_timedsend sys_mq_timedsend_time32
+264 64 mq_timedsend sys_mq_timedsend
+265 32 mq_timedreceive sys_mq_timedreceive_time32
+265 64 mq_timedreceive sys_mq_timedreceive
+266 nospu mq_notify sys_mq_notify compat_sys_mq_notify
+267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+268 nospu kexec_load sys_kexec_load compat_sys_kexec_load
+269 nospu add_key sys_add_key
+270 nospu request_key sys_request_key
+271 nospu keyctl sys_keyctl compat_sys_keyctl
+272 nospu waitid sys_waitid compat_sys_waitid
+273 nospu ioprio_set sys_ioprio_set
+274 nospu ioprio_get sys_ioprio_get
+275 nospu inotify_init sys_inotify_init
+276 nospu inotify_add_watch sys_inotify_add_watch
+277 nospu inotify_rm_watch sys_inotify_rm_watch
+278 nospu spu_run sys_spu_run
+279 nospu spu_create sys_spu_create
+280 32 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+280 64 pselect6 sys_pselect6
+281 32 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+281 64 ppoll sys_ppoll
+282 common unshare sys_unshare
+283 common splice sys_splice
+284 common tee sys_tee
+285 common vmsplice sys_vmsplice compat_sys_vmsplice
+286 common openat sys_openat compat_sys_openat
+287 common mkdirat sys_mkdirat
+288 common mknodat sys_mknodat
+289 common fchownat sys_fchownat
+290 32 futimesat sys_futimesat_time32
+290 64 futimesat sys_futimesat
+290 spu utimesat sys_futimesat
+291 32 fstatat64 sys_fstatat64
+291 64 newfstatat sys_newfstatat
+291 spu newfstatat sys_newfstatat
+292 common unlinkat sys_unlinkat
+293 common renameat sys_renameat
+294 common linkat sys_linkat
+295 common symlinkat sys_symlinkat
+296 common readlinkat sys_readlinkat
+297 common fchmodat sys_fchmodat
+298 common faccessat sys_faccessat
+299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
+300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
+301 common move_pages sys_move_pages compat_sys_move_pages
+302 common getcpu sys_getcpu
+303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
+304 32 utimensat sys_utimensat_time32
+304 64 utimensat sys_utimensat
+304 spu utimensat sys_utimensat
+305 common signalfd sys_signalfd compat_sys_signalfd
+306 common timerfd_create sys_timerfd_create
+307 common eventfd sys_eventfd
+308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2
+309 nospu fallocate sys_fallocate compat_sys_fallocate
+310 nospu subpage_prot sys_subpage_prot
+311 32 timerfd_settime sys_timerfd_settime32
+311 64 timerfd_settime sys_timerfd_settime
+311 spu timerfd_settime sys_timerfd_settime
+312 32 timerfd_gettime sys_timerfd_gettime32
+312 64 timerfd_gettime sys_timerfd_gettime
+312 spu timerfd_gettime sys_timerfd_gettime
+313 common signalfd4 sys_signalfd4 compat_sys_signalfd4
+314 common eventfd2 sys_eventfd2
+315 common epoll_create1 sys_epoll_create1
+316 common dup3 sys_dup3
+317 common pipe2 sys_pipe2
+318 nospu inotify_init1 sys_inotify_init1
+319 common perf_event_open sys_perf_event_open
+320 common preadv sys_preadv compat_sys_preadv
+321 common pwritev sys_pwritev compat_sys_pwritev
+322 nospu rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+323 nospu fanotify_init sys_fanotify_init
+324 nospu fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+325 common prlimit64 sys_prlimit64
+326 common socket sys_socket
+327 common bind sys_bind
+328 common connect sys_connect
+329 common listen sys_listen
+330 common accept sys_accept
+331 common getsockname sys_getsockname
+332 common getpeername sys_getpeername
+333 common socketpair sys_socketpair
+334 common send sys_send
+335 common sendto sys_sendto
+336 common recv sys_recv compat_sys_recv
+337 common recvfrom sys_recvfrom compat_sys_recvfrom
+338 common shutdown sys_shutdown
+339 common setsockopt sys_setsockopt compat_sys_setsockopt
+340 common getsockopt sys_getsockopt compat_sys_getsockopt
+341 common sendmsg sys_sendmsg compat_sys_sendmsg
+342 common recvmsg sys_recvmsg compat_sys_recvmsg
+343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+343 64 recvmmsg sys_recvmmsg
+343 spu recvmmsg sys_recvmmsg
+344 common accept4 sys_accept4
+345 common name_to_handle_at sys_name_to_handle_at
+346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+347 32 clock_adjtime sys_clock_adjtime32
+347 64 clock_adjtime sys_clock_adjtime
+347 spu clock_adjtime sys_clock_adjtime
+348 common syncfs sys_syncfs
+349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
+350 common setns sys_setns
+351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+353 nospu finit_module sys_finit_module
+354 nospu kcmp sys_kcmp
+355 common sched_setattr sys_sched_setattr
+356 common sched_getattr sys_sched_getattr
+357 common renameat2 sys_renameat2
+358 common seccomp sys_seccomp
+359 common getrandom sys_getrandom
+360 common memfd_create sys_memfd_create
+361 common bpf sys_bpf
+362 nospu execveat sys_execveat compat_sys_execveat
+363 32 switch_endian sys_ni_syscall
+363 64 switch_endian ppc_switch_endian
+363 spu switch_endian sys_ni_syscall
+364 common userfaultfd sys_userfaultfd
+365 common membarrier sys_membarrier
+# 366-377 originally left for IPC, now unused
+378 nospu mlock2 sys_mlock2
+379 nospu copy_file_range sys_copy_file_range
+380 common preadv2 sys_preadv2 compat_sys_preadv2
+381 common pwritev2 sys_pwritev2 compat_sys_pwritev2
+382 nospu kexec_file_load sys_kexec_file_load
+383 nospu statx sys_statx
+384 nospu pkey_alloc sys_pkey_alloc
+385 nospu pkey_free sys_pkey_free
+386 nospu pkey_mprotect sys_pkey_mprotect
+387 nospu rseq sys_rseq
+388 32 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+388 64 io_pgetevents sys_io_pgetevents
+# room for arch specific syscalls
+392 64 semtimedop sys_semtimedop
+393 common semget sys_semget
+394 common semctl sys_semctl compat_sys_semctl
+395 common shmget sys_shmget
+396 common shmctl sys_shmctl compat_sys_shmctl
+397 common shmat sys_shmat compat_sys_shmat
+398 common shmdt sys_shmdt
+399 common msgget sys_msgget
+400 common msgsnd sys_msgsnd compat_sys_msgsnd
+401 common msgrcv sys_msgrcv compat_sys_msgrcv
+402 common msgctl sys_msgctl compat_sys_msgctl
+403 32 clock_gettime64 sys_clock_gettime sys_clock_gettime
+404 32 clock_settime64 sys_clock_settime sys_clock_settime
+405 32 clock_adjtime64 sys_clock_adjtime sys_clock_adjtime
+406 32 clock_getres_time64 sys_clock_getres sys_clock_getres
+407 32 clock_nanosleep_time64 sys_clock_nanosleep sys_clock_nanosleep
+408 32 timer_gettime64 sys_timer_gettime sys_timer_gettime
+409 32 timer_settime64 sys_timer_settime sys_timer_settime
+410 32 timerfd_gettime64 sys_timerfd_gettime sys_timerfd_gettime
+411 32 timerfd_settime64 sys_timerfd_settime sys_timerfd_settime
+412 32 utimensat_time64 sys_utimensat sys_utimensat
+413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
+419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
+420 32 semtimedop_time64 sys_semtimedop sys_semtimedop
+421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 32 futex_time64 sys_futex sys_futex
+423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 nospu clone3 ppc_clone3
diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh
new file mode 100644
index 0000000..c0a9a32
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscallhdr.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+prefix="$4"
+offset="$5"
+
+fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \
+ -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
+ -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+ printf "#ifndef %s\n" "${fileguard}"
+ printf "#define %s\n" "${fileguard}"
+ printf "\n"
+
+ nxt=0
+ while read nr abi name entry compat ; do
+ if [ -z "$offset" ]; then
+ printf "#define __NR_%s%s\t%s\n" \
+ "${prefix}" "${name}" "${nr}"
+ else
+ printf "#define __NR_%s%s\t(%s + %s)\n" \
+ "${prefix}" "${name}" "${offset}" "${nr}"
+ fi
+ nxt=$((nr+1))
+ done
+
+ printf "\n"
+ printf "#ifdef __KERNEL__\n"
+ printf "#define __NR_syscalls\t%s\n" "${nxt}"
+ printf "#endif\n"
+ printf "\n"
+ printf "#endif /* %s */" "${fileguard}"
+ printf "\n"
+) > "$out"
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
new file mode 100644
index 0000000..f7393a7
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscalltbl.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+my_abi="$4"
+offset="$5"
+
+emit() {
+ t_nxt="$1"
+ t_nr="$2"
+ t_entry="$3"
+
+ while [ $t_nxt -lt $t_nr ]; do
+ printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
+ t_nxt=$((t_nxt+1))
+ done
+ printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
+}
+
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+ nxt=0
+ if [ -z "$offset" ]; then
+ offset=0
+ fi
+
+ while read nr abi name entry compat ; do
+ if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
+ emit $((nxt+offset)) $((nr+offset)) $compat
+ else
+ emit $((nxt+offset)) $((nr+offset)) $entry
+ fi
+ nxt=$((nr+1))
+ done
+) > "$out"
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 755dc98..80a676d 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -18,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/svm.h>
#include "cacheinfo.h"
#include "setup.h"
@@ -457,7 +459,7 @@
#define HAS_PPC_PMC_CLASSIC 1
#define HAS_PPC_PMC_IBM 1
#define HAS_PPC_PMC_PA6T 1
-#elif defined(CONFIG_6xx)
+#elif defined(CONFIG_PPC_BOOK3S_32)
#define HAS_PPC_PMC_CLASSIC 1
#define HAS_PPC_PMC_IBM 1
#define HAS_PPC_PMC_G4 1
@@ -610,7 +612,7 @@
SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
-#ifdef CONFIG_DEBUG_KERNEL
+#ifdef CONFIG_DEBUG_MISC
SYSFS_SPRSETUP(hid0, SPRN_HID0);
SYSFS_SPRSETUP(hid1, SPRN_HID1);
SYSFS_SPRSETUP(hid4, SPRN_HID4);
@@ -639,7 +641,7 @@
SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
-#endif /* CONFIG_DEBUG_KERNEL */
+#endif /* CONFIG_DEBUG_MISC */
#endif /* HAS_PPC_PMC_PA6T */
#ifdef HAS_PPC_PMC_IBM
@@ -680,7 +682,7 @@
__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
-#ifdef CONFIG_DEBUG_KERNEL
+#ifdef CONFIG_DEBUG_MISC
__ATTR(hid0, 0600, show_hid0, store_hid0),
__ATTR(hid1, 0600, show_hid1, store_hid1),
__ATTR(hid4, 0600, show_hid4, store_hid4),
@@ -709,11 +711,28 @@
__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
-#endif /* CONFIG_DEBUG_KERNEL */
+#endif /* CONFIG_DEBUG_MISC */
};
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void create_svm_file(void)
+{
+ device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+}
+#else
+static void create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1057,6 +1076,8 @@
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
+ create_svm_file();
+
return 0;
}
subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 919a327..5b905a2 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains the table of syscall-handling functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -7,37 +8,10 @@
*
* Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
-#ifdef CONFIG_PPC64
-#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
-#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264)
-#else
-#define SYSCALL(func) .long sys_##func
-#define COMPAT_SYS(func) .long sys_##func
-#define PPC_SYS(func) .long ppc_##func
-#define OLDSYS(func) .long sys_##func
-#define SYS32ONLY(func) .long sys_##func
-#define PPC64ONLY(func) .long sys_ni_syscall
-#define SYSX(f, f3264, f32) .long f32
-#endif
-#define SYSCALL_SPU(func) SYSCALL(func)
-#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
-#define COMPAT_SPU_NEW(func) COMPAT_SYS(func)
-#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
-
.section .rodata,"a"
#ifdef CONFIG_PPC64
@@ -46,5 +20,21 @@
.globl sys_call_table
sys_call_table:
+#ifdef CONFIG_PPC64
+#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
+#include <asm/syscall_table_64.h>
+#undef __SYSCALL
+#else
+#define __SYSCALL(nr, entry) .long entry
+#include <asm/syscall_table_32.h>
+#undef __SYSCALL
+#endif
-#include <asm/systbl.h>
+#ifdef CONFIG_COMPAT
+.globl compat_sys_call_table
+compat_sys_call_table:
+#define compat_sys_sigsuspend sys_sigsuspend
+#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
+#include <asm/syscall_table_c32.h>
+#undef __SYSCALL
+#endif
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
deleted file mode 100644
index 4653258..0000000
--- a/arch/powerpc/kernel/systbl_chk.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This file, when run through CPP produces a list of syscall numbers
- * in the order of systbl.h. That way we can check for gaps and syscalls
- * that are out of order.
- *
- * Unfortunately, we cannot check for the correct ordering of entries
- * using SYSX().
- *
- * Copyright © IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/unistd.h>
-
-#define SYSCALL(func) __NR_##func
-#define COMPAT_SYS(func) __NR_##func
-#define PPC_SYS(func) __NR_##func
-#ifdef CONFIG_PPC64
-#define OLDSYS(func) -1
-#define SYS32ONLY(func) -1
-#define PPC64ONLY(func) __NR_##func
-#else
-#define OLDSYS(func) __NR_old##func
-#define SYS32ONLY(func) __NR_##func
-#define PPC64ONLY(func) -1
-#endif
-#define SYSX(f, f3264, f32) -1
-
-#define SYSCALL_SPU(func) SYSCALL(func)
-#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
-#define COMPAT_SPU_NEW(func) COMPAT_SYS(_new##func)
-#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
-
-/* Just insert a marker for ni_syscalls */
-#define __NR_ni_syscall -1
-
-/*
- * These are the known exceptions.
- * Hopefully, there will be no more.
- */
-#define __NR_llseek __NR__llseek
-#undef __NR_umount
-#define __NR_umount __NR_umount2
-#define __NR_old_getrlimit __NR_getrlimit
-#define __NR_newstat __NR_stat
-#define __NR_newlstat __NR_lstat
-#define __NR_newfstat __NR_fstat
-#define __NR_newuname __NR_uname
-#define __NR_sysctl __NR__sysctl
-#define __NR_olddebug_setcontext __NR_sys_debug_setcontext
-
-/* We call sys_ugetrlimit for syscall number __NR_getrlimit */
-#define getrlimit ugetrlimit
-
-START_TABLE
-#include <asm/systbl.h>
-END_TABLE NR_syscalls
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
index f2e356c..c7ac3ed 100644
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ b/arch/powerpc/kernel/systbl_chk.sh
@@ -1,14 +1,11 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
#
# Just process the CPP output from systbl_chk.c and complain
# if anything is out of order.
#
# Copyright © 2008 IBM Corporation
#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
awk 'BEGIN { num = -1; } # Ignore the beginning of the file
/^#/ { next; }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 70f145e..6945223 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common time routines among all ppc machines.
*
@@ -24,11 +25,6 @@
*
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -43,7 +39,6 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
-#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -57,7 +52,6 @@
#include <linux/irq_work.h>
#include <linux/clk-provider.h>
#include <linux/suspend.h>
-#include <linux/rtc.h>
#include <linux/sched/cputime.h>
#include <linux/processor.h>
#include <asm/trace.h>
@@ -111,6 +105,7 @@
.rating = 200,
.irq = 0,
.set_next_event = decrementer_set_next_event,
+ .set_state_oneshot_stopped = decrementer_shutdown,
.set_state_shutdown = decrementer_shutdown,
.tick_resume = decrementer_shutdown,
.features = CLOCK_EVT_FEAT_ONESHOT |
@@ -151,6 +146,8 @@
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
+bool tb_invalid;
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Factor for converting from cputime_t (timebase ticks) to
@@ -175,7 +172,7 @@
* Read the SPURR on systems that have it, otherwise the PURR,
* or if that doesn't exist return the timebase value passed in.
*/
-static unsigned long read_spurr(unsigned long tb)
+static inline unsigned long read_spurr(unsigned long tb)
{
if (cpu_has_feature(CPU_FTR_SPURR))
return mfspr(SPRN_SPURR);
@@ -281,26 +278,17 @@
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-static unsigned long vtime_delta(struct task_struct *tsk,
- unsigned long *stime_scaled,
- unsigned long *steal_time)
+static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
+ unsigned long now, unsigned long stime)
{
- unsigned long now, nowscaled, deltascaled;
- unsigned long stime;
+ unsigned long stime_scaled = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ unsigned long nowscaled, deltascaled;
unsigned long utime, utime_scaled;
- struct cpu_accounting_data *acct = get_accounting(tsk);
- WARN_ON_ONCE(!irqs_disabled());
-
- now = mftb();
nowscaled = read_spurr(now);
- stime = now - acct->starttime;
- acct->starttime = now;
deltascaled = nowscaled - acct->startspurr;
acct->startspurr = nowscaled;
-
- *steal_time = calculate_stolen_time(now);
-
utime = acct->utime - acct->utime_sspurr;
acct->utime_sspurr = acct->utime;
@@ -314,17 +302,38 @@
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
- *stime_scaled = stime;
+ stime_scaled = stime;
utime_scaled = utime;
if (deltascaled != stime + utime) {
if (utime) {
- *stime_scaled = deltascaled * stime / (stime + utime);
- utime_scaled = deltascaled - *stime_scaled;
+ stime_scaled = deltascaled * stime / (stime + utime);
+ utime_scaled = deltascaled - stime_scaled;
} else {
- *stime_scaled = deltascaled;
+ stime_scaled = deltascaled;
}
}
acct->utime_scaled += utime_scaled;
+#endif
+
+ return stime_scaled;
+}
+
+static unsigned long vtime_delta(struct task_struct *tsk,
+ unsigned long *stime_scaled,
+ unsigned long *steal_time)
+{
+ unsigned long now, stime;
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ now = mftb();
+ stime = now - acct->starttime;
+ acct->starttime = now;
+
+ *stime_scaled = vtime_delta_scaled(acct, now, stime);
+
+ *steal_time = calculate_stolen_time(now);
return stime;
}
@@ -341,7 +350,9 @@
if ((tsk->flags & PF_VCPU) && !irq_count()) {
acct->gtime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->utime_scaled += stime_scaled;
+#endif
} else {
if (hardirq_count())
acct->hardirq_time += stime;
@@ -350,7 +361,9 @@
else
acct->stime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->stime_scaled += stime_scaled;
+#endif
}
}
EXPORT_SYMBOL_GPL(vtime_account_system);
@@ -364,6 +377,21 @@
acct->idle_time += stime + steal_time;
}
+static void vtime_flush_scaled(struct task_struct *tsk,
+ struct cpu_accounting_data *acct)
+{
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ if (acct->utime_scaled)
+ tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
+ if (acct->stime_scaled)
+ tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
+
+ acct->utime_scaled = 0;
+ acct->utime_sspurr = 0;
+ acct->stime_scaled = 0;
+#endif
+}
+
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
@@ -378,14 +406,13 @@
if (acct->utime)
account_user_time(tsk, cputime_to_nsecs(acct->utime));
- if (acct->utime_scaled)
- tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
-
if (acct->gtime)
account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
- if (acct->steal_time)
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
account_steal_time(cputime_to_nsecs(acct->steal_time));
+ acct->steal_time = 0;
+ }
if (acct->idle_time)
account_idle_time(cputime_to_nsecs(acct->idle_time));
@@ -393,8 +420,6 @@
if (acct->stime)
account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
CPUTIME_SYSTEM);
- if (acct->stime_scaled)
- tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
if (acct->hardirq_time)
account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
@@ -403,14 +428,12 @@
account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
CPUTIME_SOFTIRQ);
+ vtime_flush_scaled(tsk, acct);
+
acct->utime = 0;
- acct->utime_scaled = 0;
- acct->utime_sspurr = 0;
acct->gtime = 0;
- acct->steal_time = 0;
acct->idle_time = 0;
acct->stime = 0;
- acct->stime_scaled = 0;
acct->hardirq_time = 0;
acct->softirq_time = 0;
}
@@ -434,6 +457,13 @@
diff += 1000000000;
spin_cpu_relax();
} while (diff < loops);
+ } else if (tb_invalid) {
+ /*
+ * TB is in error state and isn't ticking anymore.
+ * HMI handler was unable to recover from TB error.
+ * Return immediately, so that kernel won't get stuck here.
+ */
+ spin_cpu_relax();
} else {
start = get_tbl();
while (get_tbl() - start < loops)
@@ -984,10 +1014,14 @@
*dec = decrementer_clockevent;
dec->cpumask = cpumask_of(cpu);
+ clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
+
printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
- clockevents_register_device(dec);
+ /* Set values for KVM, see kvm_emulate_dec() */
+ decrementer_clockevent.mult = dec->mult;
+ decrementer_clockevent.shift = dec->shift;
}
static void enable_large_decrementer(void)
@@ -1035,18 +1069,7 @@
static void __init init_decrementer_clockevent(void)
{
- int cpu = smp_processor_id();
-
- clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
-
- decrementer_clockevent.max_delta_ns =
- clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
- decrementer_clockevent.max_delta_ticks = decrementer_max;
- decrementer_clockevent.min_delta_ns =
- clockevent_delta2ns(2, &decrementer_clockevent);
- decrementer_clockevent.min_delta_ticks = 2;
-
- register_decrementer_clockevent(cpu);
+ register_decrementer_clockevent(smp_processor_id());
}
void secondary_cpu_time_init(void)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 7716374..6ba0fdd 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -92,13 +92,14 @@
blr
EXPORT_SYMBOL_GPL(tm_abort);
-/* void tm_reclaim(struct thread_struct *thread,
+/*
+ * void tm_reclaim(struct thread_struct *thread,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
- * transactions and updates thread->regs.tm_ckpt_* with the
- * original checkpointed state. Note that thread->regs is
- * unchanged.
+ * transactions and updates thread.ckpt_regs, thread.ckfp_state and
+ * thread.ckvr_state with the original checkpointed state. Note that
+ * thread->regs is unchanged.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -147,7 +148,7 @@
/* Stash the stack pointer away for use after reclaim */
std r1, PACAR1(r13)
- /* Clear MSR RI since we are about to change r1, EE is already off. */
+ /* Clear MSR RI since we are about to use SCRATCH0, EE is already off */
li r5, 0
mtmsrd r5, 1
@@ -163,15 +164,16 @@
*/
TRECLAIM(R4) /* Cause in r4 */
- /* ******************** GPRs ******************** */
- /* Stash the checkpointed r13 away in the scratch SPR and get the real
- * paca
+ /*
+ * ******************** GPRs ********************
+ * Stash the checkpointed r13 in the scratch SPR and get the real paca.
*/
SET_SCRATCH0(r13)
GET_PACA(r13)
- /* Stash the checkpointed r1 away in paca tm_scratch and get the real
- * stack pointer back
+ /*
+ * Stash the checkpointed r1 away in paca->tm_scratch and get the real
+ * stack pointer back into r1.
*/
std r1, PACATMSCRATCH(r13)
ld r1, PACAR1(r13)
@@ -209,14 +211,15 @@
addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is NOT a pt_regs ptr!
*/
subi r7, r7, STACK_FRAME_OVERHEAD
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
+ SAVE_GPR(2, r7) /* user r2 */
SAVE_4GPRS(3, r7) /* user r3-r6 */
SAVE_GPR(8, r7) /* user r8 */
SAVE_GPR(9, r7) /* user r9 */
@@ -237,7 +240,8 @@
/* ******************** NIP ******************** */
mfspr r3, SPRN_TFHAR
std r3, _NIP(r7) /* Returns to failhandler */
- /* The checkpointed NIP is ignored when rescheduling/rechkpting,
+ /*
+ * The checkpointed NIP is ignored when rescheduling/rechkpting,
* but is used in signal return to 'wind back' to the abort handler.
*/
@@ -260,12 +264,13 @@
std r3, THREAD_TM_TAR(r12)
std r4, THREAD_TM_DSCR(r12)
- /* MSR and flags: We don't change CRs, and we don't need to alter
- * MSR.
+ /*
+ * MSR and flags: We don't change CRs, and we don't need to alter MSR.
*/
- /* ******************** FPR/VR/VSRs ************
+ /*
+ * ******************** FPR/VR/VSRs ************
* After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
@@ -275,7 +280,7 @@
/* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
- SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
+ SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 ckvr_state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
@@ -286,12 +291,13 @@
/* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
- SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
+ SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 ckfp_state */
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
- /* TM regs, incl TEXASR -- these live in thread_struct. Note they've
+ /*
+ * TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
* cause (aborted).
*/
@@ -327,7 +333,7 @@
blr
- /*
+ /*
* void __tm_recheckpoint(struct thread_struct *thread)
* - Restore the checkpointed register state saved by tm_reclaim
* when we switch_to a process.
@@ -343,7 +349,8 @@
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+ /*
+ * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
* This is used for backing up the NVGPRs:
*/
SAVE_NVGPRS(r1)
@@ -352,8 +359,9 @@
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
*/
subi r7, r7, STACK_FRAME_OVERHEAD
@@ -421,14 +429,15 @@
REST_NVGPRS(r7) /* GPR14-31 */
- /* Load up PPR and DSCR here so we don't run with user values for long
- */
+ /* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
mtspr SPRN_PPR, r6
- /* Do final sanity check on TEXASR to make sure FS is set. Do this
+ /*
+ * Do final sanity check on TEXASR to make sure FS is set. Do this
* here before we load up the userspace r1 so any bugs we hit will get
- * a call chain */
+ * a call chain.
+ */
mfspr r5, SPRN_TEXASR
srdi r5, r5, 16
li r6, (TEXASR_FS)@h
@@ -436,8 +445,9 @@
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* Do final sanity check on MSR to make sure we are not transactional
- * or suspended
+ /*
+ * Do final sanity check on MSR to make sure we are not transactional
+ * or suspended.
*/
mfmsr r6
li r5, (MSR_TS_MASK)@higher
@@ -453,8 +463,8 @@
REST_GPR(6, r7)
/*
- * Store r1 and r5 on the stack so that we can access them
- * after we clear MSR RI.
+ * Store r1 and r5 on the stack so that we can access them after we
+ * clear MSR RI.
*/
REST_GPR(5, r7)
@@ -464,7 +474,7 @@
REST_GPR(7, r7)
- /* Clear MSR RI since we are about to change r1. EE is already off */
+ /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */
li r5, 0
mtmsrd r5, 1
@@ -484,7 +494,8 @@
HMT_MEDIUM
- /* Our transactional state has now changed.
+ /*
+ * Our transactional state has now changed.
*
* Now just get out of here. Transactional (current) state will be
* updated once restore is called on the return path in the _switch-ed
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index d22d8ba..8585037 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -3,11 +3,9 @@
# Makefile for the powerpc trace subsystem
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ifdef CONFIG_FUNCTION_TRACER
# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
endif
obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
@@ -25,6 +23,7 @@
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-# Disable GCOV & sanitizers in odd or sensitive code
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_ftrace.o := n
+KCOV_INSTRUMENT_ftrace.o := n
UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 19ef4f5..7ea0ca0 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -30,6 +30,16 @@
#ifdef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define NUM_FTRACE_TRAMPS 8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
static unsigned int
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
@@ -85,16 +95,19 @@
return create_branch((unsigned int *)ip, addr, 0);
}
-#ifdef CONFIG_MODULES
-
static int is_bl_op(unsigned int op)
{
return (op & 0xfc000003) == 0x48000001;
}
+static int is_b_op(unsigned int op)
+{
+ return (op & 0xfc000003) == 0x48000000;
+}
+
static unsigned long find_bl_target(unsigned long ip, unsigned int op)
{
- static int offset;
+ int offset;
offset = (op & 0x03fffffc);
/* make it signed */
@@ -104,6 +117,7 @@
return ip + (long)offset;
}
+#ifdef CONFIG_MODULES
#ifdef CONFIG_PPC64
static int
__ftrace_make_nop(struct module *mod,
@@ -270,6 +284,146 @@
#endif /* PPC64 */
#endif /* CONFIG_MODULES */
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ /*
+ * We have the compiler generated long_branch tramps at the end
+ * and we prefer those
+ */
+ for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (create_branch((void *)ip, ftrace_tramps[i], 0))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+ int i, op;
+ unsigned long ptr;
+ static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i])
+ break;
+ else if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* Is this a known plt tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_plt_tramps[i])
+ break;
+ else if (ftrace_plt_tramps[i] == tramp)
+ return -1;
+
+ /* New trampoline -- read where this goes */
+ if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) {
+ pr_debug("Fetching opcode failed.\n");
+ return -1;
+ }
+
+ /* Is this a 24 bit branch? */
+ if (!is_b_op(op)) {
+ pr_debug("Trampoline is not a long branch tramp.\n");
+ return -1;
+ }
+
+ /* lets find where the pointer goes */
+ ptr = find_bl_target(tramp, op);
+
+ if (ptr != ppc_global_function_entry((void *)_mcount)) {
+ pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+ return -1;
+ }
+
+ /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+ ptr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+ if (!create_branch((void *)tramp, ptr, 0)) {
+ pr_debug("%ps is not reachable from existing mcount tramp\n",
+ (void *)ptr);
+ return -1;
+ }
+
+ if (patch_branch((unsigned int *)tramp, ptr, 0)) {
+ pr_debug("REL24 out of range!\n");
+ return -1;
+ }
+
+ if (add_ftrace_tramp(tramp)) {
+ pr_debug("No tramp locations left\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long tramp, ip = rec->ip;
+ unsigned int op;
+
+ /* Read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* Let's find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (setup_mcount_compiler_tramp(tramp)) {
+ /* Are other trampolines reachable? */
+ if (!find_ftrace_tramp(ip)) {
+ pr_err("No ftrace trampolines reachable from %ps\n",
+ (void *)ip);
+ return -EINVAL;
+ }
+ }
+
+ if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -286,7 +440,8 @@
old = ftrace_call_replace(ip, addr, 1);
new = PPC_INST_NOP;
return ftrace_modify_code(ip, old, new);
- }
+ } else if (core_kernel_text(ip))
+ return __ftrace_make_nop_kernel(rec, addr);
#ifdef CONFIG_MODULES
/*
@@ -456,6 +611,53 @@
#endif /* CONFIG_PPC64 */
#endif /* CONFIG_MODULES */
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ void *ip = (void *)rec->ip;
+ unsigned long tramp, entry, ptr;
+
+ /* Make sure we're being asked to patch branch to a known ftrace addr */
+ entry = ppc_global_function_entry((void *)ftrace_caller);
+ ptr = ppc_global_function_entry((void *)addr);
+
+ if (ptr != entry) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+ if (ptr != entry) {
+#endif
+ pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+ return -EINVAL;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ }
+#endif
+ }
+
+ /* Make sure we have a nop */
+ if (probe_kernel_read(&op, ip, sizeof(op))) {
+ pr_err("Unable to read ftrace location %p\n", ip);
+ return -EFAULT;
+ }
+
+ if (op != PPC_INST_NOP) {
+ pr_err("Unexpected call sequence at %p: %x\n", ip, op);
+ return -EINVAL;
+ }
+
+ tramp = find_ftrace_tramp((unsigned long)ip);
+ if (!tramp) {
+ pr_err("No ftrace trampolines reachable from %ps\n", ip);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("Error patching branch to ftrace tramp!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
@@ -471,7 +673,8 @@
old = PPC_INST_NOP;
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
- }
+ } else if (core_kernel_text(ip))
+ return __ftrace_make_call_kernel(rec, addr);
#ifdef CONFIG_MODULES
/*
@@ -603,6 +806,12 @@
old = ftrace_call_replace(ip, old_addr, 1);
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ /*
+ * We always patch out of range locations to go to the regs
+ * variant, so there is nothing to do here
+ */
+ return 0;
}
#ifdef CONFIG_MODULES
@@ -654,10 +863,50 @@
ftrace_modify_all_code(command);
}
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+int __init ftrace_dyn_arch_init(void)
+{
+ int i;
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ u32 stub_insns[] = {
+ 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
+ 0x3d8c0000, /* addis r12,r12,<high> */
+ 0x398c0000, /* addi r12,r12,<low> */
+ 0x7d8903a6, /* mtctr r12 */
+ 0x4e800420, /* bctr */
+ };
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+ unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+ long reladdr = addr - kernel_toc_addr();
+
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+
+ return 0;
+}
+#else
int __init ftrace_dyn_arch_init(void)
{
return 0;
}
+#endif
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -695,7 +944,8 @@
* Hook the return address and push it in the stack of return addrs
* in current thread info. Return the address we want to divert to.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
{
unsigned long return_hooker;
@@ -707,20 +957,13 @@
return_hooker = ppc_function_entry(return_to_handler);
- if (!function_graph_enter(parent, ip, 0, NULL))
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
out:
return parent;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
-unsigned long __init arch_syscall_addr(int nr)
-{
- return sys_call_table[nr*2];
-}
-#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
-
#ifdef PPC64_ELF_ABI_v1
char *arch_ftrace_match_adjust(char *str, const char *search)
{
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index 2c29098..e023ae5 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Split from entry_32.S
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/magic.h>
@@ -54,6 +50,7 @@
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
index e25f77c..25e5b9e 100644
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Split from entry_64.S
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/magic.h>
@@ -14,6 +10,18 @@
#include <asm/ppc-opcode.h>
#include <asm/export.h>
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 64
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 64
+.popsection
+
_GLOBAL(mcount)
_GLOBAL(_mcount)
EXPORT_SYMBOL(_mcount)
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 32476a6..f9fd5f7 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Split from ftrace_64.S
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/magic.h>
@@ -229,7 +225,7 @@
* - r0, r11 & r12 are free
*/
livepatch_handler:
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
/* Allocate 3 x 8 bytes */
ld r11, TI_livepatch_sp(r12)
@@ -256,7 +252,7 @@
* restore it.
*/
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
ld r11, TI_livepatch_sp(r12)
@@ -273,7 +269,7 @@
ld r2, -24(r11)
/* Pop livepatch stack frame */
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
subi r11, r11, 24
std r11, TI_livepatch_sp(r12)
@@ -298,6 +294,7 @@
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+ addi r5, r1, 112
mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index 4c515c4..6708e24 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Split from ftrace_64.S
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/magic.h>
@@ -45,6 +41,7 @@
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
/* load r4 with local address */
ld r4, 128(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c
index 4917069..b0143a3 100644
--- a/arch/powerpc/kernel/trace/trace_clock.c
+++ b/arch/powerpc/kernel/trace/trace_clock.c
@@ -1,7 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*
* Copyright (C) 2015 Naveen N. Rao, IBM Corporation
*/
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 8689a02..82f4353 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
* Copyright 2007-2010 Freescale Semiconductor, Inc.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Modified by Cort Dougan (cort@cs.nmt.edu)
* and Paul Mackerras (paulus@samba.org)
*/
@@ -71,6 +67,7 @@
#include <sysdev/fsl_pci.h>
#include <asm/kprobes.h>
#include <asm/stacktrace.h>
+#include <asm/nmi.h>
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -178,7 +175,7 @@
kmsg_dump(KMSG_DUMP_PANIC);
bust_spinlocks(0);
debug_locks_off();
- console_flush_on_panic();
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
}
static unsigned long oops_begin(struct pt_regs *regs)
@@ -247,8 +244,6 @@
mdelay(MSEC_PER_SEC);
}
- if (in_interrupt())
- panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
do_exit(signr);
@@ -259,24 +254,17 @@
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
- if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
- printk("LE ");
- else
- printk("BE ");
-
- if (IS_ENABLED(CONFIG_PREEMPT))
- pr_cont("PREEMPT ");
-
- if (IS_ENABLED(CONFIG_SMP))
- pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
-
- if (debug_pagealloc_enabled())
- pr_cont("DEBUG_PAGEALLOC ");
-
- if (IS_ENABLED(CONFIG_NUMA))
- pr_cont("NUMA ");
-
- pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
+ printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+ IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+ PAGE_SIZE / 1024,
+ early_radix_enabled() ? " MMU=Radix" : "",
+ early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+ IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+ debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+ IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
+ ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -307,12 +295,9 @@
}
NOKPROBE_SYMBOL(die);
-void user_single_step_siginfo(struct task_struct *tsk,
- struct pt_regs *regs, siginfo_t *info)
+void user_single_step_report(struct pt_regs *regs)
{
- info->si_signo = SIGTRAP;
- info->si_code = TRAP_TRACE;
- info->si_addr = (void __user *)regs->nip;
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
}
static void show_signal_msg(int signr, struct pt_regs *regs, int code,
@@ -341,14 +326,12 @@
show_user_instructions(regs);
}
-void _exception_pkey(int signr, struct pt_regs *regs, int code,
- unsigned long addr, int key)
+static bool exception_common(int signr, struct pt_regs *regs, int code,
+ unsigned long addr)
{
- siginfo_t info;
-
if (!user_mode(regs)) {
die("Exception in kernel mode", regs, signr);
- return;
+ return false;
}
show_signal_msg(signr, regs, code, addr);
@@ -364,30 +347,120 @@
*/
thread_pkey_regs_save(¤t->thread);
- clear_siginfo(&info);
- info.si_signo = signr;
- info.si_code = code;
- info.si_addr = (void __user *) addr;
- info.si_pkey = key;
+ return true;
+}
- force_sig_info(signr, &info, current);
+void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
+{
+ if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
+ return;
+
+ force_sig_pkuerr((void __user *) addr, key);
}
void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
{
- _exception_pkey(signr, regs, code, addr, 0);
+ if (!exception_common(signr, regs, code, addr))
+ return;
+
+ force_sig_fault(signr, code, (void __user *)addr);
+}
+
+/*
+ * The interrupt architecture has a quirk in that the HV interrupts excluding
+ * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
+ * that an interrupt handler must do is save off a GPR into a scratch register,
+ * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
+ * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
+ * that it is non-reentrant, which leads to random data corruption.
+ *
+ * The solution is for NMI interrupts in HV mode to check if they originated
+ * from these critical HV interrupt regions. If so, then mark them not
+ * recoverable.
+ *
+ * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
+ * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
+ * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
+ * that would work. However any other guest OS that may have the SPRG live
+ * and MSR[RI]=1 could encounter silent corruption.
+ *
+ * Builds that do not support KVM could take this second option to increase
+ * the recoverability of NMIs.
+ */
+void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_POWERNV
+ unsigned long kbase = (unsigned long)_stext;
+ unsigned long nip = regs->nip;
+
+ if (!(regs->msr & MSR_RI))
+ return;
+ if (!(regs->msr & MSR_HV))
+ return;
+ if (regs->msr & MSR_PR)
+ return;
+
+ /*
+ * Now test if the interrupt has hit a range that may be using
+ * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
+ * problem ranges all run un-relocated. Test real and virt modes
+ * at the same time by droping the high bit of the nip (virt mode
+ * entry points still have the +0x4000 offset).
+ */
+ nip &= ~0xc000000000000000ULL;
+ if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
+ goto nonrecoverable;
+ if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
+ goto nonrecoverable;
+ if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
+ goto nonrecoverable;
+ if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
+ goto nonrecoverable;
+
+ /* Trampoline code runs un-relocated so subtract kbase. */
+ if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
+ nip < (unsigned long)(end_real_trampolines - kbase))
+ goto nonrecoverable;
+ if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
+ nip < (unsigned long)(end_virt_trampolines - kbase))
+ goto nonrecoverable;
+ return;
+
+nonrecoverable:
+ regs->msr &= ~MSR_RI;
+#endif
}
void system_reset_exception(struct pt_regs *regs)
{
+ unsigned long hsrr0, hsrr1;
+ bool nested = in_nmi();
+ bool saved_hsrrs = false;
+
/*
* Avoid crashes in case of nested NMI exceptions. Recoverability
* is determined by RI and in_nmi
*/
- bool nested = in_nmi();
if (!nested)
nmi_enter();
+ /*
+ * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
+ * The system reset interrupt itself may clobber HSRRs (e.g., to call
+ * OPAL), so save them here and restore them before returning.
+ *
+ * Machine checks don't need to save HSRRs, as the real mode handler
+ * is careful to avoid them, and the regular handler is not delivered
+ * as an NMI.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ hsrr0 = mfspr(SPRN_HSRR0);
+ hsrr1 = mfspr(SPRN_HSRR1);
+ saved_hsrrs = true;
+ }
+
+ hv_nmi_check_nonrecoverable(regs);
+
__this_cpu_inc(irq_stat.sreset_irqs);
/* See if any machine dependent calls */
@@ -399,6 +472,7 @@
if (debugger(regs))
goto out;
+ kmsg_dump(KMSG_DUMP_OOPS);
/*
* A system reset is a request to dump, so we always send
* it through the crashdump code (if fadump or kdump are
@@ -435,6 +509,11 @@
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable System Reset");
+ if (saved_hsrrs) {
+ mtspr(SPRN_HSRR0, hsrr0);
+ mtspr(SPRN_HSRR1, hsrr1);
+ }
+
if (!nested)
nmi_exit();
@@ -535,10 +614,10 @@
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR) {
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
/*
* This is recoverable by invalidating the i-cache.
@@ -556,7 +635,7 @@
}
if (reason & MCSR_DCPERR_MC) {
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
/*
* In write shadow mode we auto-recover from the error, but it
@@ -575,38 +654,38 @@
}
if (reason & MCSR_L2MMU_MHIT) {
- printk("Hit on multiple TLB entries\n");
+ pr_cont("Hit on multiple TLB entries\n");
recoverable = 0;
}
if (reason & MCSR_NMI)
- printk("Non-maskable interrupt\n");
+ pr_cont("Non-maskable interrupt\n");
if (reason & MCSR_IF) {
- printk("Instruction Fetch Error Report\n");
+ pr_cont("Instruction Fetch Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LD) {
- printk("Load Error Report\n");
+ pr_cont("Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_ST) {
- printk("Store Error Report\n");
+ pr_cont("Store Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LDG) {
- printk("Guarded Load Error Report\n");
+ pr_cont("Guarded Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_TLBSYNC)
- printk("Simultaneous tlbsync operations\n");
+ pr_cont("Simultaneous tlbsync operations\n");
if (reason & MCSR_BSL2_ERR) {
- printk("Level 2 Cache Error\n");
+ pr_cont("Level 2 Cache Error\n");
recoverable = 0;
}
@@ -616,7 +695,7 @@
addr = mfspr(SPRN_MCAR);
addr |= (u64)mfspr(SPRN_MCARU) << 32;
- printk("Machine Check %s Address: %#llx\n",
+ pr_cont("Machine Check %s Address: %#llx\n",
reason & MCSR_MEA ? "Effective" : "Physical", addr);
}
@@ -640,29 +719,29 @@
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR)
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
if (reason & MCSR_DCP_PERR)
- printk("Data Cache Push Parity Error\n");
+ pr_cont("Data Cache Push Parity Error\n");
if (reason & MCSR_DCPERR)
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
if (reason & MCSR_BUS_IAERR)
- printk("Bus - Instruction Address Error\n");
+ pr_cont("Bus - Instruction Address Error\n");
if (reason & MCSR_BUS_RAERR)
- printk("Bus - Read Address Error\n");
+ pr_cont("Bus - Read Address Error\n");
if (reason & MCSR_BUS_WAERR)
- printk("Bus - Write Address Error\n");
+ pr_cont("Bus - Write Address Error\n");
if (reason & MCSR_BUS_IBERR)
- printk("Bus - Instruction Data Error\n");
+ pr_cont("Bus - Instruction Data Error\n");
if (reason & MCSR_BUS_RBERR)
- printk("Bus - Read Data Bus Error\n");
+ pr_cont("Bus - Read Data Bus Error\n");
if (reason & MCSR_BUS_WBERR)
- printk("Bus - Write Data Bus Error\n");
+ pr_cont("Bus - Write Data Bus Error\n");
if (reason & MCSR_BUS_IPERR)
- printk("Bus - Instruction Parity Error\n");
+ pr_cont("Bus - Instruction Parity Error\n");
if (reason & MCSR_BUS_RPERR)
- printk("Bus - Read Parity Error\n");
+ pr_cont("Bus - Read Parity Error\n");
return 0;
}
@@ -680,19 +759,19 @@
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_CP_PERR)
- printk("Cache Push Parity Error\n");
+ pr_cont("Cache Push Parity Error\n");
if (reason & MCSR_CPERR)
- printk("Cache Parity Error\n");
+ pr_cont("Cache Parity Error\n");
if (reason & MCSR_EXCP_ERR)
- printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
+ pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
if (reason & MCSR_BUS_IRERR)
- printk("Bus - Read Bus Error on instruction fetch\n");
+ pr_cont("Bus - Read Bus Error on instruction fetch\n");
if (reason & MCSR_BUS_DRERR)
- printk("Bus - Read Bus Error on data load\n");
+ pr_cont("Bus - Read Bus Error on data load\n");
if (reason & MCSR_BUS_WRERR)
- printk("Bus - Write Bus Error on buffered store or cache line push\n");
+ pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
return 0;
}
@@ -705,30 +784,30 @@
printk("Caused by (from SRR1=%lx): ", reason);
switch (reason & 0x601F0000) {
case 0x80000:
- printk("Machine check signal\n");
+ pr_cont("Machine check signal\n");
break;
case 0: /* for 601 */
case 0x40000:
case 0x140000: /* 7450 MSS error and TEA */
- printk("Transfer error ack signal\n");
+ pr_cont("Transfer error ack signal\n");
break;
case 0x20000:
- printk("Data parity error signal\n");
+ pr_cont("Data parity error signal\n");
break;
case 0x10000:
- printk("Address parity error signal\n");
+ pr_cont("Address parity error signal\n");
break;
case 0x20000000:
- printk("L1 Data Cache error\n");
+ pr_cont("L1 Data Cache error\n");
break;
case 0x40000000:
- printk("L1 Instruction Cache error\n");
+ pr_cont("L1 Instruction Cache error\n");
break;
case 0x00100000:
- printk("L2 data cache parity error\n");
+ pr_cont("L2 data cache parity error\n");
break;
default:
- printk("Unknown values in msr\n");
+ pr_cont("Unknown values in msr\n");
}
return 0;
}
@@ -741,9 +820,7 @@
if (!nested)
nmi_enter();
- /* 64s accounts the mce in machine_check_early when in HVMODE */
- if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
- __this_cpu_inc(irq_stat.mce_exceptions);
+ __this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -767,15 +844,15 @@
if (check_io_access(regs))
goto bail;
- /* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable Machine check");
-
if (!nested)
nmi_exit();
die("Machine check", regs, SIGBUS);
+ /* Must die if the interrupt is not recoverable */
+ if (!(regs->msr & MSR_RI))
+ nmi_panic(regs, "Unrecoverable Machine check");
+
return;
bail:
@@ -841,7 +918,7 @@
addr = (__force const void __user *)ea;
/* Check it */
- if (!access_ok(VERIFY_READ, addr, 16)) {
+ if (!access_ok(addr, 16)) {
pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
" instr=%08x addr=%016lx\n",
smp_processor_id(), current->comm, current->pid,
@@ -1438,7 +1515,8 @@
goto bail;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
- "at %lx (msr 0x%x)\n", regs->nip, reason);
+ "at %lx (msr 0x%lx) tm_scratch=%llx\n",
+ regs->nip, regs->msr, get_paca()->tm_scratch);
die("Unrecoverable exception", regs, SIGABRT);
}
}
@@ -1545,21 +1623,13 @@
void StackOverflow(struct pt_regs *regs)
{
- printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
- current, regs->gpr[1]);
+ pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
+ current->comm, task_pid_nr(current), regs->gpr[1]);
debugger(regs);
show_regs(regs);
panic("kernel stack overflow");
}
-void nonrecoverable_exception(struct pt_regs *regs)
-{
- printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
- regs->nip, regs->msr);
- debugger(regs);
- die("nonrecoverable exception", regs, SIGKILL);
-}
-
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
@@ -1755,16 +1825,20 @@
* checkpointed FP registers need to be loaded.
*/
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- /* Reclaim didn't save out any FPRs to transact_fprs. */
+
+ /*
+ * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
+ * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
+ *
+ * At this point, ck{fp,vr}_state contains the exact values we want to
+ * recheckpoint.
+ */
/* Enable FP for the task: */
current->thread.load_fp = 1;
- /* This loads and recheckpoints the FP registers from
- * thread.fpr[]. They will remain in registers after the
- * checkpoint so we don't need to reload them after.
- * If VMX is in use, the VRs now hold checkpointed values,
- * so we don't want to load the VRs from the thread_struct.
+ /*
+ * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
*/
tm_recheckpoint(¤t->thread);
}
@@ -2011,6 +2085,10 @@
int code = FPE_FLTUNK;
int err;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
flush_spe_to_thread(current);
spefscr = current->thread.spefscr;
@@ -2056,6 +2134,10 @@
extern int speround_handler(struct pt_regs *regs);
int err;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
preempt_disable();
if (regs->msr & MSR_SPE)
giveup_spe(current);
@@ -2091,8 +2173,8 @@
*/
void unrecoverable_exception(struct pt_regs *regs)
{
- printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
- regs->trap, regs->nip);
+ pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
+ regs->trap, regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
}
NOKPROBE_SYMBOL(unrecoverable_exception);
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 0000000..07296bc
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+ sc 2 /* Invoke the ultravisor */
+ blr /* Return r3 = status */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 7cc38b5..a384e7c 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* polling mode stateless debugging stuff, originally for NS16550 Serial Ports
*
* c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
@@ -74,7 +70,7 @@
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG
- console_loglevel = 10;
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
register_early_udbg_console();
#endif
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 411116c..9356b60 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* udbg for NS16550 compatible serial ports
*
* Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <asm/udbg.h>
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 5d105b8..1cfef0e 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* User-space Probes (UProbes) for powerpc
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corporation, 2007-2012
*
* Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 65b3bdb..eae9dda 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -98,28 +94,6 @@
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
-#ifdef CONFIG_PPC32
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_gettimeofday", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_gettime", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_getres", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_get_tbfreq", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_time", NULL
- },
-#endif
};
/*
@@ -671,15 +645,18 @@
{
unsigned int i;
extern unsigned long *sys_call_table;
+#ifdef CONFIG_PPC64
+ extern unsigned long *compat_sys_call_table;
+#endif
extern unsigned long sys_ni_syscall;
for (i = 0; i < NR_syscalls; i++) {
#ifdef CONFIG_PPC64
- if (sys_call_table[i*2] != sys_ni_syscall)
+ if (sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
- if (sys_call_table[i*2+1] != sys_ni_syscall)
+ if (compat_sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#else /* CONFIG_PPC64 */
@@ -795,7 +772,6 @@
BUG_ON(vdso32_pagelist == NULL);
for (i = 0; i < vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
get_page(pg);
vdso32_pagelist[i] = pg;
}
@@ -809,7 +785,6 @@
BUG_ON(vdso64_pagelist == NULL);
for (i = 0; i < vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
get_page(pg);
vdso64_pagelist[i] = pg;
}
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 50112d4..06f54d9 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -23,11 +23,11 @@
obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
+ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
asflags-y := -D__VDSO32__ -s
obj-y += vdso32_wrapper.o
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 1ba6feb..7f882e7 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* vDSO provided cache flush routines
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
* IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 3745113..6c7401b 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Access to the shared data page by the vDSO & syscall map
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
@@ -37,6 +33,7 @@
mtlr r0
addi r3, r3, __kernel_datapage_offset-data_page_branch
lwz r0,0(r3)
+ .cfi_restore lr
add r3,r0,r3
blr
.cfi_endproc
@@ -73,6 +70,7 @@
*
* returns the timebase frequency in HZ
*/
+#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
@@ -85,3 +83,4 @@
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index c62be60..63e9145 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 769c262..becd9f8 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Userland implementation of gettimeofday() for 32 bits processes in a
* ppc64 kernel for use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
* IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
@@ -98,7 +94,7 @@
* can be used, r7 contains NSEC_PER_SEC.
*/
- lwz r5,WTOM_CLOCK_SEC(r9)
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
lwz r6,WTOM_CLOCK_NSEC(r9)
/* We now have our offset in r5,r6. We create a fake dependency
@@ -139,6 +135,7 @@
*/
99:
li r0,__NR_clock_gettime
+ .cfi_restore lr
sc
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S
index cf0c9c9..0bcc5e5 100644
--- a/arch/powerpc/kernel/vdso32/sigtramp.S
+++ b/arch/powerpc/kernel/vdso32/sigtramp.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Signal trampolines for 32 bits processes in a ppc64 kernel for
* use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db..00c025b 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,10 +144,13 @@
__kernel_datapage_offset;
__kernel_get_syscall_map;
+#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_time;
__kernel_get_tbfreq;
+#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
@@ -155,7 +158,6 @@
#ifdef CONFIG_PPC64
__kernel_getcpu;
#endif
- __kernel_time;
local: *;
};
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 69cecb3..32ebb35 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -9,11 +9,11 @@
obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
+ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
asflags-y := -D__VDSO64__ -s
obj-y += vdso64_wrapper.o
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index 69c5af2..3f92561 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* vDSO provided cache flush routines
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
* IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index abf17fe..dc84f5a 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Access to the shared data page by the vDSO & syscall map
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
@@ -37,6 +33,7 @@
mtlr r0
addi r3, r3, __kernel_datapage_offset-data_page_branch
lwz r0,0(r3)
+ .cfi_restore lr
add r3,r0,r3
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S
index 23eb9a9..12bbf23 100644
--- a/arch/powerpc/kernel/vdso64/getcpu.S
+++ b/arch/powerpc/kernel/vdso64/getcpu.S
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index c002adc..07bfe33 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Userland implementation of gettimeofday() for 64 bits processes in a
* ppc64 kernel for use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
* IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
@@ -92,7 +88,7 @@
* At this point, r4,r5 contain our sec/nsec values.
*/
- lwa r6,WTOM_CLOCK_SEC(r3)
+ ld r6,WTOM_CLOCK_SEC(r3)
lwa r9,WTOM_CLOCK_NSEC(r3)
/* We now have our result in r6,r9. We create a fake dependency
@@ -125,7 +121,7 @@
bne cr6,75f
/* CLOCK_MONOTONIC_COARSE */
- lwa r6,WTOM_CLOCK_SEC(r3)
+ ld r6,WTOM_CLOCK_SEC(r3)
lwa r9,WTOM_CLOCK_NSEC(r3)
/* check if counter has updated */
@@ -169,6 +165,7 @@
*/
99:
li r0,__NR_clock_gettime
+ .cfi_restore lr
sc
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
index 542c6f4..a8cc040 100644
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Signal trampoline for 64 bits processes in a ppc64 kernel for
* use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 21165da..8eb867d 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -21,6 +21,7 @@
REST_32VRS(0,r4,r3)
blr
EXPORT_SYMBOL(load_vr_state)
+_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
/*
* Store VMX state into memory, including VSCR.
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 07ae018..060a1ac 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -4,16 +4,16 @@
#else
#define PROVIDE32(x) PROVIDE(x)
#endif
+
+#define BSS_FIRST_SECTIONS *(.bss.prominit)
+
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
-#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
-#define STRICT_ALIGN_SIZE (1 << 24)
-#else
-#define STRICT_ALIGN_SIZE PAGE_SIZE
-#endif
+#define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
+#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT)
ENTRY(_stext)
@@ -83,11 +83,11 @@
#ifdef CONFIG_PPC64
/*
- * BLOCK(0) overrides the default output section alignment because
+ * ALIGN(0) overrides the default output section alignment because
* this needs to start right after .head.text in order for fixed
* section placement to work.
*/
- .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+ .text ALIGN(0) : AT(ADDR(.text) - LOAD_OFFSET) {
#ifdef CONFIG_LD_HEAD_STUB_CATCH
KEEP(*(.linker_stub_catch));
. = . ;
@@ -99,6 +99,9 @@
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
*(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+#ifdef CONFIG_PPC64
+ *(.tramp.ftrace.text);
+#endif
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
@@ -125,7 +128,7 @@
} :kernel
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(ETEXT_ALIGN_SIZE);
_etext = .;
PROVIDE32 (etext = .);
@@ -164,6 +167,14 @@
}
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ . = ALIGN(8);
+ __spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
+ __start__btb_flush_fixup = .;
+ *(__btb_flush_fixup)
+ __stop__btb_flush_fixup = .;
+ }
+#endif
EXCEPTION_TABLE(0)
NOTES :kernel :notes
@@ -181,7 +192,15 @@
*/
. = ALIGN(STRICT_ALIGN_SIZE);
__init_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE) :kernel
+ . = ALIGN(PAGE_SIZE);
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+ _einittext = .;
+#ifdef CONFIG_PPC64
+ *(.tramp.ftrace.init);
+#endif
+ } :kernel
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
@@ -192,12 +211,6 @@
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
- __vtop_table_begin = .;
- KEEP(*(.vtop_fixup));
- __vtop_table_end = .;
- __ptov_table_begin = .;
- KEEP(*(.ptov_fixup));
- __ptov_table_end = .;
}
.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
@@ -212,8 +225,6 @@
CON_INITCALL
}
- SECURITY_INIT
-
. = ALIGN(8);
__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
__start___ftr_fixup = .;
@@ -296,11 +307,16 @@
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
+#ifdef CONFIG_UBSAN
+ *(.data..Lubsan_data*)
+ *(.data..Lubsan_type*)
+#endif
*(.data.rel*)
*(SDATA_MAIN)
*(.sdata2)
*(.got.plt) *(.got)
*(.plt)
+ *(.branch_lt)
}
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3c6ab22..af3c15a 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -77,7 +77,7 @@
static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
-static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
@@ -293,21 +293,21 @@
nmi_exit();
}
-static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
-{
- t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
- if (wd_timer_period_ms > 1000)
- t->expires = __round_jiffies_up(t->expires, cpu);
- add_timer_on(t, cpu);
-}
-
-static void wd_timer_fn(struct timer_list *t)
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
int cpu = smp_processor_id();
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return HRTIMER_NORESTART;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return HRTIMER_NORESTART;
+
watchdog_timer_interrupt(cpu);
- wd_timer_reset(cpu, t);
+ hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
+
+ return HRTIMER_RESTART;
}
void arch_touch_nmi_watchdog(void)
@@ -323,37 +323,22 @@
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
-static void start_watchdog_timer_on(unsigned int cpu)
+static void start_watchdog(void *arg)
{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- per_cpu(wd_timer_tb, cpu) = get_tb();
-
- timer_setup(t, wd_timer_fn, TIMER_PINNED);
- wd_timer_reset(cpu, t);
-}
-
-static void stop_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- del_timer_sync(t);
-}
-
-static int start_wd_on_cpu(unsigned int cpu)
-{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
- return 0;
+ return;
}
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- return 0;
+ return;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
- return 0;
+ return;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
@@ -363,27 +348,40 @@
}
wd_smp_unlock(&flags);
- start_watchdog_timer_on(cpu);
+ *this_cpu_ptr(&wd_timer_tb) = get_tb();
- return 0;
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = watchdog_timer_fn;
+ hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
+ HRTIMER_MODE_REL_PINNED);
}
-static int stop_wd_on_cpu(unsigned int cpu)
+static int start_watchdog_on_cpu(unsigned int cpu)
{
+ return smp_call_function_single(cpu, start_watchdog, NULL, true);
+}
+
+static void stop_watchdog(void *arg)
+{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return 0; /* Can happen in CPU unplug case */
+ return; /* Can happen in CPU unplug case */
- stop_watchdog_timer_on(cpu);
+ hrtimer_cancel(hrtimer);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb());
+}
- return 0;
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+ return smp_call_function_single(cpu, stop_watchdog, NULL, true);
}
static void watchdog_calc_timeouts(void)
@@ -402,7 +400,7 @@
int cpu;
for_each_cpu(cpu, &wd_cpus_enabled)
- stop_wd_on_cpu(cpu);
+ stop_watchdog_on_cpu(cpu);
}
void watchdog_nmi_start(void)
@@ -411,7 +409,7 @@
watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
- start_wd_on_cpu(cpu);
+ start_watchdog_on_cpu(cpu);
}
/*
@@ -423,7 +421,8 @@
err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"powerpc/watchdog:online",
- start_wd_on_cpu, stop_wd_on_cpu);
+ start_watchdog_on_cpu,
+ stop_watchdog_on_cpu);
if (err < 0) {
pr_warn("could not be initialized");
return err;