Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 9d9a173..ac7a252 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,18 +2,31 @@
CFLAGS_init.o := -mcmodel=medany
ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_init.o = -pg
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
endif
+KCOV_INSTRUMENT_init.o := n
+
obj-y += init.o
-obj-y += fault.o
obj-y += extable.o
-obj-y += ioremap.o
+obj-$(CONFIG_MMU) += fault.o pageattr.o
obj-y += cacheflush.o
obj-y += context.o
-obj-y += sifive_l2_cache.o
ifeq ($(CONFIG_MMU),y)
obj-$(CONFIG_SMP) += tlbflush.o
endif
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
+obj-$(CONFIG_KASAN) += kasan_init.o
+
+ifdef CONFIG_KASAN
+KASAN_SANITIZE_kasan_init.o := n
+KASAN_SANITIZE_init.o := n
+ifdef CONFIG_DEBUG_VIRTUAL
+KASAN_SANITIZE_physaddr.o := n
+endif
+endif
+
+obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index c54bd3c..89f8106 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -3,16 +3,25 @@
* Copyright (C) 2017 SiFive
*/
-#include <asm/pgtable.h>
#include <asm/cacheflush.h>
#ifdef CONFIG_SMP
#include <asm/sbi.h>
+static void ipi_remote_fence_i(void *info)
+{
+ return local_flush_icache_all();
+}
+
void flush_icache_all(void)
{
- sbi_remote_fence_i(NULL);
+ local_flush_icache_all();
+
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ sbi_remote_fence_i(NULL);
+ else
+ on_each_cpu(ipi_remote_fence_i, NULL, 1);
}
EXPORT_SYMBOL(flush_icache_all);
@@ -29,7 +38,7 @@
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
- cpumask_t others, hmask, *mask;
+ cpumask_t others, *mask;
preempt_disable();
@@ -47,10 +56,7 @@
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
- if (mm != current->active_mm || !local) {
- riscv_cpuid_to_hartid_mask(&others, &hmask);
- sbi_remote_fence_i(hmask.bits);
- } else {
+ if (mm == current->active_mm && local) {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
@@ -60,6 +66,13 @@
* with flush_icache_deferred().
*/
smp_mb();
+ } else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
+ cpumask_t hartid_mask;
+
+ riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
+ sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ } else {
+ on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
preempt_enable();
@@ -67,6 +80,7 @@
#endif /* CONFIG_SMP */
+#ifdef CONFIG_MMU
void flush_icache_pte(pte_t pte)
{
struct page *page = pte_page(pte);
@@ -74,3 +88,4 @@
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
flush_icache_all();
}
+#endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ca66d44..613ec81 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -58,8 +58,10 @@
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
+#ifdef CONFIG_MMU
csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
local_flush_tlb_all();
+#endif
flush_icache_deferred(next);
}
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 7aed917..2fc7294 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -15,9 +15,9 @@
{
const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->sepc);
+ fixup = search_exception_tables(regs->epc);
if (fixup) {
- regs->sepc = fixup->fixup;
+ regs->epc = fixup->fixup;
return 1;
}
return 0;
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 247b8c8..3c8b9e4 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -14,168 +14,13 @@
#include <linux/signal.h>
#include <linux/uaccess.h>
-#include <asm/pgalloc.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#include "../kernel/head.h"
-/*
- * This routine handles page faults. It determines the address and the
- * problem, and then passes it off to one of the appropriate routines.
- */
-asmlinkage void do_page_fault(struct pt_regs *regs)
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
{
- struct task_struct *tsk;
- struct vm_area_struct *vma;
- struct mm_struct *mm;
- unsigned long addr, cause;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- int code = SEGV_MAPERR;
- vm_fault_t fault;
-
- cause = regs->scause;
- addr = regs->sbadaddr;
-
- tsk = current;
- mm = tsk->mm;
-
- /*
- * Fault-in kernel-space virtual memory on-demand.
- * The 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- */
- if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
- goto vmalloc_fault;
-
- /* Enable interrupts if they were enabled in the parent context. */
- if (likely(regs->sstatus & SR_SPIE))
- local_irq_enable();
-
- /*
- * If we're in an interrupt, have no user context, or are running
- * in an atomic region, then we must not take the fault.
- */
- if (unlikely(faulthandler_disabled() || !mm))
- goto no_context;
-
- if (user_mode(regs))
- flags |= FAULT_FLAG_USER;
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-
-retry:
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, addr);
- if (unlikely(!vma))
- goto bad_area;
- if (likely(vma->vm_start <= addr))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- goto bad_area;
- if (unlikely(expand_stack(vma, addr)))
- goto bad_area;
-
- /*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it.
- */
-good_area:
- code = SEGV_ACCERR;
-
- switch (cause) {
- case EXC_INST_PAGE_FAULT:
- if (!(vma->vm_flags & VM_EXEC))
- goto bad_area;
- break;
- case EXC_LOAD_PAGE_FAULT:
- if (!(vma->vm_flags & VM_READ))
- goto bad_area;
- break;
- case EXC_STORE_PAGE_FAULT:
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- break;
- default:
- panic("%s: unhandled cause %lu", __func__, cause);
- }
-
- /*
- * If for any reason at all we could not handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
- fault = handle_mm_fault(vma, addr, flags);
-
- /*
- * If we need to retry but a fatal signal is pending, handle the
- * signal first. We do not need to release the mmap_sem because it
- * would already be released in __lock_page_or_retry in mm/filemap.c.
- */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
- return;
-
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
- }
-
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, addr);
- }
- if (fault & VM_FAULT_RETRY) {
- /*
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
- * of starvation.
- */
- flags &= ~(FAULT_FLAG_ALLOW_RETRY);
- flags |= FAULT_FLAG_TRIED;
-
- /*
- * No need to up_read(&mm->mmap_sem) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
- goto retry;
- }
- }
-
- up_read(&mm->mmap_sem);
- return;
-
- /*
- * Something tried to access memory that isn't in our memory map.
- * Fix it, but check if it's kernel or user first.
- */
-bad_area:
- up_read(&mm->mmap_sem);
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs)) {
- do_trap(regs, SIGSEGV, code, addr);
- return;
- }
-
-no_context:
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
@@ -190,93 +35,270 @@
"paging request", addr);
die(regs, "Oops");
do_exit(SIGKILL);
+}
- /*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (!user_mode(regs))
- goto no_context;
- pagefault_out_of_memory();
- return;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
- /* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs))
- goto no_context;
- do_trap(regs, SIGBUS, BUS_ADRERR, addr);
- return;
-
-vmalloc_fault:
- {
- pgd_t *pgd, *pgd_k;
- pud_t *pud, *pud_k;
- p4d_t *p4d, *p4d_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
- int index;
-
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs))
- return do_trap(regs, SIGSEGV, code, addr);
-
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+ if (fault & VM_FAULT_OOM) {
/*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "tsk->active_mm->pgd" here.
- * We might be inside an interrupt in the middle
- * of a task switch.
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
*/
- index = pgd_index(addr);
- pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- goto no_context;
- set_pgd(pgd, *pgd_k);
-
- p4d = p4d_offset(pgd, addr);
- p4d_k = p4d_offset(pgd_k, addr);
- if (!p4d_present(*p4d_k))
- goto no_context;
-
- pud = pud_offset(p4d, addr);
- pud_k = pud_offset(p4d_k, addr);
- if (!pud_present(*pud_k))
- goto no_context;
-
- /*
- * Since the vmalloc area is global, it is unnecessary
- * to copy individual PTEs
- */
- pmd = pmd_offset(pud, addr);
- pmd_k = pmd_offset(pud_k, addr);
- if (!pmd_present(*pmd_k))
- goto no_context;
- set_pmd(pmd, *pmd_k);
-
- /*
- * Make sure the actual PTE exists as well to
- * catch kernel vmalloc-area accesses to non-mapped
- * addresses. If we don't do this, this will just
- * silently loop forever.
- */
- pte_k = pte_offset_kernel(pmd_k, addr);
- if (!pte_present(*pte_k))
- goto no_context;
-
- /*
- * The kernel assumes that TLBs don't cache invalid
- * entries, but in RISC-V, SFENCE.VMA specifies an
- * ordering constraint, not a cache flush; it is
- * necessary even after writing invalid entries.
- */
- local_flush_tlb_page(addr);
-
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ pagefault_out_of_memory();
+ return;
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
}
+ BUG();
+}
+
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+ /*
+ * Something tried to access memory that isn't in our memory map.
+ * Fix it, but check if it's kernel or user first.
+ */
+ mmap_read_unlock(mm);
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ do_trap(regs, SIGSEGV, code, addr);
+ return;
+ }
+
+ no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ p4d_t *p4d, *p4d_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+ int index;
+ unsigned long pfn;
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs))
+ return do_trap(regs, SIGSEGV, code, addr);
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk->active_mm->pgd" here.
+ * We might be inside an interrupt in the middle
+ * of a task switch.
+ */
+ index = pgd_index(addr);
+ pfn = csr_read(CSR_SATP) & SATP_PPN;
+ pgd = (pgd_t *)pfn_to_virt(pfn) + index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pgd(pgd, *pgd_k);
+
+ p4d = p4d_offset(pgd, addr);
+ p4d_k = p4d_offset(pgd_k, addr);
+ if (!p4d_present(*p4d_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ pud = pud_offset(p4d, addr);
+ pud_k = pud_offset(p4d_k, addr);
+ if (!pud_present(*pud_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * Since the vmalloc area is global, it is unnecessary
+ * to copy individual PTEs
+ */
+ pmd = pmd_offset(pud, addr);
+ pmd_k = pmd_offset(pud_k, addr);
+ if (!pmd_present(*pmd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pmd(pmd, *pmd_k);
+
+ /*
+ * Make sure the actual PTE exists as well to
+ * catch kernel vmalloc-area accesses to non-mapped
+ * addresses. If we don't do this, this will just
+ * silently loop forever.
+ */
+ pte_k = pte_offset_kernel(pmd_k, addr);
+ if (!pte_present(*pte_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * The kernel assumes that TLBs don't cache invalid
+ * entries, but in RISC-V, SFENCE.VMA specifies an
+ * ordering constraint, not a cache flush; it is
+ * necessary even after writing invalid entries.
+ */
+ local_flush_tlb_page(addr);
+}
+
+static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
+{
+ switch (cause) {
+ case EXC_INST_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_EXEC)) {
+ return true;
+ }
+ break;
+ case EXC_LOAD_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_READ)) {
+ return true;
+ }
+ break;
+ case EXC_STORE_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_WRITE)) {
+ return true;
+ }
+ break;
+ default:
+ panic("%s: unhandled cause %lu", __func__, cause);
+ }
+ return false;
+}
+
+/*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs)
+{
+ struct task_struct *tsk;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ unsigned long addr, cause;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
+ int code = SEGV_MAPERR;
+ vm_fault_t fault;
+
+ cause = regs->cause;
+ addr = regs->badaddr;
+
+ tsk = current;
+ mm = tsk->mm;
+
+ /*
+ * Fault-in kernel-space virtual memory on-demand.
+ * The 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ */
+ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
+
+ /* Enable interrupts if they were enabled in the parent context. */
+ if (likely(regs->status & SR_PIE))
+ local_irq_enable();
+
+ /*
+ * If we're in an interrupt, have no user context, or are running
+ * in an atomic region, then we must not take the fault.
+ */
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
+ if (cause == EXC_STORE_PAGE_FAULT)
+ flags |= FAULT_FLAG_WRITE;
+ else if (cause == EXC_INST_PAGE_FAULT)
+ flags |= FAULT_FLAG_INSTRUCTION;
+retry:
+ mmap_read_lock(mm);
+ vma = find_vma(mm, addr);
+ if (unlikely(!vma)) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (likely(vma->vm_start <= addr))
+ goto good_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (unlikely(expand_stack(vma, addr))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it.
+ */
+good_area:
+ code = SEGV_ACCERR;
+
+ if (unlikely(access_error(cause, vma))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+
+ /*
+ * If for any reason at all we could not handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(vma, addr, flags, regs);
+
+ /*
+ * If we need to retry but a fatal signal is pending, handle the
+ * signal first. We do not need to release the mmap_lock because it
+ * would already be released in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fault_signal_pending(fault, regs))
+ return;
+
+ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ flags |= FAULT_FLAG_TRIED;
+
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
+ }
+
+ mmap_read_unlock(mm);
+
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ mm_fault_error(regs, addr, fault);
+ return;
+ }
+ return;
}
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 0d4747e..932dadf 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -4,39 +4,29 @@
int pud_huge(pud_t pud)
{
- return pud_present(pud) &&
- (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pud_leaf(pud);
}
int pmd_huge(pmd_t pmd)
{
- return pmd_present(pmd) &&
- (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pmd_leaf(pmd);
}
-static __init int setup_hugepagesz(char *opt)
+bool __init arch_hugetlb_valid_size(unsigned long size)
{
- unsigned long ps = memparse(opt, &opt);
-
- if (ps == HPAGE_SIZE) {
- hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT);
- } else if (IS_ENABLED(CONFIG_64BIT) && ps == PUD_SIZE) {
- hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else {
- hugetlb_bad_size();
- pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
- return 0;
- }
-
- return 1;
+ if (size == HPAGE_SIZE)
+ return true;
+ else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
+ return true;
+ else
+ return false;
}
-__setup("hugepagesz=", setup_hugepagesz);
#ifdef CONFIG_CONTIG_ALLOC
static __init int gigantic_pages_init(void)
{
/* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
- if (IS_ENABLED(CONFIG_64BIT) && !size_to_hstate(1UL << PUD_SHIFT))
+ if (IS_ENABLED(CONFIG_64BIT))
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
return 0;
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index d49e334..e8921e7 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -12,12 +12,14 @@
#include <linux/sizes.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/set_memory.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
-#include <asm/pgtable.h>
+#include <asm/soc.h>
#include <asm/io.h>
+#include <asm/ptdump.h>
#include "../kernel/head.h"
@@ -26,6 +28,18 @@
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
+#define DTB_EARLY_BASE_VA PGDIR_SIZE
+void *dtb_early_va __initdata;
+uintptr_t dtb_early_pa __initdata;
+
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+#endif
+};
static void __init zone_sizes_init(void)
{
@@ -37,14 +51,45 @@
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
- free_area_init_nodes(max_zone_pfns);
+ free_area_init(max_zone_pfns);
}
-void setup_zero_page(void)
+static void setup_zero_page(void)
{
memset((void *)empty_zero_page, 0, PAGE_SIZE);
}
+#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
+static inline void print_mlk(char *name, unsigned long b, unsigned long t)
+{
+ pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t,
+ (((t) - (b)) >> 10));
+}
+
+static inline void print_mlm(char *name, unsigned long b, unsigned long t)
+{
+ pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t,
+ (((t) - (b)) >> 20));
+}
+
+static void print_vm_layout(void)
+{
+ pr_notice("Virtual kernel memory layout:\n");
+ print_mlk("fixmap", (unsigned long)FIXADDR_START,
+ (unsigned long)FIXADDR_TOP);
+ print_mlm("pci io", (unsigned long)PCI_IO_START,
+ (unsigned long)PCI_IO_END);
+ print_mlm("vmemmap", (unsigned long)VMEMMAP_START,
+ (unsigned long)VMEMMAP_END);
+ print_mlm("vmalloc", (unsigned long)VMALLOC_START,
+ (unsigned long)VMALLOC_END);
+ print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
+ (unsigned long)high_memory);
+}
+#else
+static void print_vm_layout(void) { }
+#endif /* CONFIG_DEBUG_VM */
+
void __init mem_init(void)
{
#ifdef CONFIG_FLATMEM
@@ -55,24 +100,46 @@
memblock_free_all();
mem_init_print_info(NULL);
+ print_vm_layout();
}
#ifdef CONFIG_BLK_DEV_INITRD
static void __init setup_initrd(void)
{
+ phys_addr_t start;
unsigned long size;
- if (initrd_start >= initrd_end) {
- pr_info("initrd not found or empty");
- goto disable;
- }
- if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
- pr_err("initrd extends beyond end of memory");
+ /* Ignore the virtul address computed during device tree parsing */
+ initrd_start = initrd_end = 0;
+
+ if (!phys_initrd_size)
+ return;
+ /*
+ * Round the memory region to page boundaries as per free_initrd_mem()
+ * This allows us to detect whether the pages overlapping the initrd
+ * are in use, but more importantly, reserves the entire set of pages
+ * as we don't want these pages allocated for other purposes.
+ */
+ start = round_down(phys_initrd_start, PAGE_SIZE);
+ size = phys_initrd_size + (phys_initrd_start - start);
+ size = round_up(size, PAGE_SIZE);
+
+ if (!memblock_is_region_memory(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
+ (u64)start, size);
goto disable;
}
- size = initrd_end - initrd_start;
- memblock_reserve(__pa(initrd_start), size);
+ if (memblock_is_region_reserved(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
+ (u64)start, size);
+ goto disable;
+ }
+
+ memblock_reserve(start, size);
+ /* Now convert initrd to virtual addresses */
+ initrd_start = (unsigned long)__va(phys_initrd_start);
+ initrd_end = initrd_start + phys_initrd_size;
initrd_below_start_ok = 1;
pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
@@ -85,37 +152,46 @@
}
#endif /* CONFIG_BLK_DEV_INITRD */
-static phys_addr_t dtb_early_pa __initdata;
-
void __init setup_bootmem(void)
{
- struct memblock_region *reg;
- phys_addr_t mem_size = 0;
- phys_addr_t vmlinux_end = __pa(&_end);
- phys_addr_t vmlinux_start = __pa(&_start);
+ phys_addr_t mem_start = 0;
+ phys_addr_t start, dram_end, end = 0;
+ phys_addr_t vmlinux_end = __pa_symbol(&_end);
+ phys_addr_t vmlinux_start = __pa_symbol(&_start);
+ phys_addr_t max_mapped_addr = __pa(~(ulong)0);
+ u64 i;
/* Find the memory region containing the kernel */
- for_each_memblock(memory, reg) {
- phys_addr_t end = reg->base + reg->size;
-
- if (reg->base <= vmlinux_start && vmlinux_end <= end) {
- mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
-
- /*
- * Remove memblock from the end of usable area to the
- * end of region
- */
- if (reg->base + mem_size < end)
- memblock_remove(reg->base + mem_size,
- end - reg->base - mem_size);
- }
+ for_each_mem_range(i, &start, &end) {
+ phys_addr_t size = end - start;
+ if (!mem_start)
+ mem_start = start;
+ if (start <= vmlinux_start && vmlinux_end <= end)
+ BUG_ON(size == 0);
}
- BUG_ON(mem_size == 0);
+
+ /*
+ * The maximal physical memory size is -PAGE_OFFSET.
+ * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
+ * as it is unusable by kernel.
+ */
+ memblock_enforce_memory_limit(-PAGE_OFFSET);
/* Reserve from the start of the kernel to the end of the kernel */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
- max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ dram_end = memblock_end_of_DRAM();
+
+ /*
+ * memblock allocator is not aware of the fact that last 4K bytes of
+ * the addressable memory can not be mapped because of IS_ERR_VALUE
+ * macro. Make sure that last 4k bytes are not usable by memblock
+ * if end of dram is equal to maximum addressable memory.
+ */
+ if (max_mapped_addr == (dram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+
+ max_pfn = PFN_DOWN(dram_end);
max_low_pfn = max_pfn;
set_max_mapnr(max_low_pfn);
@@ -132,29 +208,19 @@
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
-
- for_each_memblock(memory, reg) {
- unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
- unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
-
- memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn),
- &memblock.memory, 0);
- }
}
+#ifdef CONFIG_MMU
+static struct pt_alloc_ops pt_ops;
+
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
-void *dtb_early_va;
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
-static bool mmu_enabled;
-
-#define MAX_EARLY_MAPPING_SIZE SZ_128M
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
@@ -174,71 +240,101 @@
local_flush_tlb_page(addr);
}
-static pte_t *__init get_pte_virt(phys_addr_t pa)
+static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
{
- if (mmu_enabled) {
- clear_fixmap(FIX_PTE);
- return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
- } else {
- return (pte_t *)((uintptr_t)pa);
- }
+ return (pte_t *)((uintptr_t)pa);
}
-static phys_addr_t __init alloc_pte(uintptr_t va)
+static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PTE);
+ return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
+}
+
+static inline pte_t *get_pte_virt_late(phys_addr_t pa)
+{
+ return (pte_t *) __va(pa);
+}
+
+static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
{
/*
* We only create PMD or PGD early mappings so we
* should never reach here with MMU disabled.
*/
- BUG_ON(!mmu_enabled);
+ BUG();
+}
+static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
+{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
+static phys_addr_t alloc_pte_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
+ BUG();
+ return __pa(vaddr);
+}
+
static void __init create_pte_mapping(pte_t *ptep,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
- uintptr_t pte_index = pte_index(va);
+ uintptr_t pte_idx = pte_index(va);
BUG_ON(sz != PAGE_SIZE);
- if (pte_none(ptep[pte_index]))
- ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot);
+ if (pte_none(ptep[pte_idx]))
+ ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
}
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
+pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
-#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
-#define NUM_EARLY_PMDS 1UL
-#else
-#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
-#endif
-pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
-
-static pmd_t *__init get_pmd_virt(phys_addr_t pa)
+static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
- if (mmu_enabled) {
- clear_fixmap(FIX_PMD);
- return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
- } else {
- return (pmd_t *)((uintptr_t)pa);
- }
+ /* Before MMU is enabled */
+ return (pmd_t *)((uintptr_t)pa);
}
-static phys_addr_t __init alloc_pmd(uintptr_t va)
+static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
{
- uintptr_t pmd_num;
+ clear_fixmap(FIX_PMD);
+ return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
+}
- if (mmu_enabled)
- return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+static pmd_t *get_pmd_virt_late(phys_addr_t pa)
+{
+ return (pmd_t *) __va(pa);
+}
- pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
- BUG_ON(pmd_num >= NUM_EARLY_PMDS);
- return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
+static phys_addr_t __init alloc_pmd_early(uintptr_t va)
+{
+ BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pmd;
+}
+
+static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pmd_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
}
static void __init create_pmd_mapping(pmd_t *pmdp,
@@ -247,65 +343,63 @@
{
pte_t *ptep;
phys_addr_t pte_phys;
- uintptr_t pmd_index = pmd_index(va);
+ uintptr_t pmd_idx = pmd_index(va);
if (sz == PMD_SIZE) {
- if (pmd_none(pmdp[pmd_index]))
- pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot);
+ if (pmd_none(pmdp[pmd_idx]))
+ pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
return;
}
- if (pmd_none(pmdp[pmd_index])) {
- pte_phys = alloc_pte(va);
- pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
- ptep = get_pte_virt(pte_phys);
+ if (pmd_none(pmdp[pmd_idx])) {
+ pte_phys = pt_ops.alloc_pte(va);
+ pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
+ ptep = pt_ops.get_pte_virt(pte_phys);
memset(ptep, 0, PAGE_SIZE);
} else {
- pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index]));
- ptep = get_pte_virt(pte_phys);
+ pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
+ ptep = pt_ops.get_pte_virt(pte_phys);
}
create_pte_mapping(ptep, va, pa, sz, prot);
}
#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
+#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define PTE_PARENT_SIZE PMD_SIZE
#define fixmap_pgd_next fixmap_pmd
#else
#define pgd_next_t pte_t
-#define alloc_pgd_next(__va) alloc_pte(__va)
-#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
+#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define PTE_PARENT_SIZE PGDIR_SIZE
#define fixmap_pgd_next fixmap_pte
#endif
-static void __init create_pgd_mapping(pgd_t *pgdp,
+void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
pgd_next_t *nextp;
phys_addr_t next_phys;
- uintptr_t pgd_index = pgd_index(va);
+ uintptr_t pgd_idx = pgd_index(va);
if (sz == PGDIR_SIZE) {
- if (pgd_val(pgdp[pgd_index]) == 0)
- pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot);
+ if (pgd_val(pgdp[pgd_idx]) == 0)
+ pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
return;
}
- if (pgd_val(pgdp[pgd_index]) == 0) {
+ if (pgd_val(pgdp[pgd_idx]) == 0) {
next_phys = alloc_pgd_next(va);
- pgdp[pgd_index] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
+ pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
nextp = get_pgd_next_virt(next_phys);
memset(nextp, 0, PAGE_SIZE);
} else {
- next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index]));
+ next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
nextp = get_pgd_next_virt(next_phys);
}
@@ -314,14 +408,11 @@
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
- uintptr_t map_size = PAGE_SIZE;
+ /* Upgrade to PMD_SIZE mappings whenever possible */
+ if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
+ return PAGE_SIZE;
- /* Upgrade to PMD/PGDIR mappings whenever possible */
- if (!(base & (PTE_PARENT_SIZE - 1)) &&
- !(size & (PTE_PARENT_SIZE - 1)))
- map_size = PTE_PARENT_SIZE;
-
- return map_size;
+ return PMD_SIZE;
}
/*
@@ -344,10 +435,13 @@
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
- uintptr_t va, end_va;
+ uintptr_t va, pa, end_va;
uintptr_t load_pa = (uintptr_t)(&_start);
uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
- uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
+ uintptr_t map_size;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#endif
va_pa_offset = PAGE_OFFSET - load_pa;
pfn_base = PFN_DOWN(load_pa);
@@ -356,13 +450,18 @@
* Enforce boot alignment requirements of RV32 and
* RV64 by only allowing PMD or PGD mappings.
*/
- BUG_ON(map_size == PAGE_SIZE);
+ map_size = PMD_SIZE;
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((load_pa % map_size) != 0);
- BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+#endif
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
(uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
@@ -393,42 +492,84 @@
load_pa + (va - PAGE_OFFSET),
map_size, PAGE_KERNEL_EXEC);
- /* Create fixed mapping for early FDT parsing */
- end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
- for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
- create_pte_mapping(fixmap_pte, va,
- dtb_pa + (va - __fix_to_virt(FIX_FDT)),
- PAGE_SIZE, PAGE_KERNEL);
-
- /* Save pointer to DTB for early FDT parsing */
- dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
- /* Save physical address for memblock reservation */
+#ifndef __PAGETABLE_PMD_FOLDED
+ /* Setup early PMD for DTB */
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
+ /* Create two consecutive PMD mappings for FDT early scan */
+ pa = dtb_pa & ~(PMD_SIZE - 1);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+ pa, PMD_SIZE, PAGE_KERNEL);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
+ /* Create two consecutive PGD mappings for FDT early scan */
+ pa = dtb_pa & ~(PGDIR_SIZE - 1);
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ pa, PGDIR_SIZE, PAGE_KERNEL);
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
+ pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
+#endif
dtb_early_pa = dtb_pa;
+
+ /*
+ * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+ * range can not span multiple pmds.
+ */
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ /*
+ * Early ioremap fixmap is already created as it lies within first 2MB
+ * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
+ * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
+ * the user if not.
+ */
+ fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
+ fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
+ if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
+ WARN_ON(1);
+ pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
+ pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
+ }
+#endif
}
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
phys_addr_t pa, start, end;
- struct memblock_region *reg;
+ u64 i;
- /* Set mmu_enabled flag */
- mmu_enabled = true;
-
+ /**
+ * MMU is enabled at this point. But page table setup is not complete yet.
+ * fixmap page table alloc functions should be used at this point
+ */
+ pt_ops.alloc_pte = alloc_pte_fixmap;
+ pt_ops.get_pte_virt = get_pte_virt_fixmap;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_fixmap;
+ pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
+#endif
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
- __pa(fixmap_pgd_next),
+ __pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */
- for_each_memblock(memory, reg) {
- start = reg->base;
- end = start + reg->size;
-
+ for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
- if (memblock_is_nomap(reg))
- continue;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
@@ -446,23 +587,94 @@
clear_fixmap(FIX_PMD);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
+
+ /* generic page allocation functions must be used to setup page table */
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+#endif
+}
+#else
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+#ifdef CONFIG_BUILTIN_DTB
+ dtb_early_va = soc_lookup_builtin_dtb();
+ if (!dtb_early_va) {
+ /* Fallback to first available DTS */
+ dtb_early_va = (void *) __dtb_start;
+ }
+#else
+ dtb_early_va = (void *)dtb_pa;
+#endif
+ dtb_early_pa = dtb_pa;
+}
+
+static inline void setup_vm_final(void)
+{
+}
+#endif /* CONFIG_MMU */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+ unsigned long text_start = (unsigned long)_text;
+ unsigned long text_end = (unsigned long)_etext;
+ unsigned long rodata_start = (unsigned long)__start_rodata;
+ unsigned long data_start = (unsigned long)_data;
+ unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+
+ set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
+ set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
+
+ debug_checkwx();
+}
+#endif
+
+static void __init resource_init(void)
+{
+ struct memblock_region *region;
+
+ for_each_mem_region(region) {
+ struct resource *res;
+
+ res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+ if (!res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct resource));
+
+ if (memblock_is_nomap(region)) {
+ res->name = "reserved";
+ res->flags = IORESOURCE_MEM;
+ } else {
+ res->name = "System RAM";
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ }
+ res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+
+ request_resource(&iomem_resource, res);
+ }
}
void __init paging_init(void)
{
setup_vm_final();
- memblocks_present();
sparse_init();
setup_zero_page();
zone_sizes_init();
+ resource_init();
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node);
+ return vmemmap_populate_basepages(start, end, node, NULL);
}
#endif
diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c
deleted file mode 100644
index ac621dd..0000000
--- a/arch/riscv/mm/ioremap.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2012 Regents of the University of California
- */
-
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/io.h>
-
-#include <asm/pgtable.h>
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
- pgprot_t prot, void *caller)
-{
- phys_addr_t last_addr;
- unsigned long offset, vaddr;
- struct vm_struct *area;
-
- /* Disallow wrap-around or zero size */
- last_addr = addr + size - 1;
- if (!size || last_addr < addr)
- return NULL;
-
- /* Page-align mappings */
- offset = addr & (~PAGE_MASK);
- addr -= offset;
- size = PAGE_ALIGN(size + offset);
-
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (!area)
- return NULL;
- vaddr = (unsigned long)area->addr;
-
- if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
- free_vm_area(area);
- return NULL;
- }
-
- return (void __iomem *)(vaddr + offset);
-}
-
-/*
- * ioremap - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * Must be freed with iounmap.
- */
-void __iomem *ioremap(phys_addr_t offset, unsigned long size)
-{
- return __ioremap_caller(offset, size, PAGE_KERNEL,
- __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap);
-
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
- vunmap((void *)((unsigned long)addr & PAGE_MASK));
-}
-EXPORT_SYMBOL(iounmap);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
new file mode 100644
index 0000000..2db4427
--- /dev/null
+++ b/arch/riscv/mm/kasan_init.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Andes Technology Corporation
+
+#include <linux/pfn.h>
+#include <linux/init_task.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
+asmlinkage void __init kasan_early_init(void)
+{
+ uintptr_t i;
+ pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
+
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+ pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t) kasan_early_shadow_pte)),
+ __pgprot(_PAGE_TABLE)));
+
+ for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
+ i += PGDIR_SIZE, ++pgd)
+ set_pgd(pgd,
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
+
+ /* init for swapper_pg_dir */
+ pgd = pgd_offset_k(KASAN_SHADOW_START);
+
+ for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
+ i += PGDIR_SIZE, ++pgd)
+ set_pgd(pgd,
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
+
+ local_flush_tlb_all();
+}
+
+static void __init populate(void *start, void *end)
+{
+ unsigned long i, offset;
+ unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+ unsigned long vend = PAGE_ALIGN((unsigned long)end);
+ unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
+ unsigned long n_ptes =
+ ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
+ unsigned long n_pmds =
+ ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
+
+ pte_t *pte =
+ memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ pmd_t *pmd =
+ memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+
+ for (i = 0; i < n_pages; i++) {
+ phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
+ }
+
+ for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
+ set_pmd(&pmd[i],
+ pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
+ __pgprot(_PAGE_TABLE)));
+
+ for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
+ set_pgd(&pgd[i],
+ pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
+ __pgprot(_PAGE_TABLE)));
+
+ local_flush_tlb_all();
+ memset(start, 0, end - start);
+}
+
+void __init kasan_init(void)
+{
+ phys_addr_t _start, _end;
+ u64 i;
+
+ kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
+ (void *)kasan_mem_to_shadow((void *)
+ VMALLOC_END));
+
+ for_each_mem_range(i, &_start, &_end) {
+ void *start = (void *)__va(_start);
+ void *end = (void *)__va(_end);
+
+ if (start >= end)
+ break;
+
+ populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
+ };
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_early_shadow_pte[i],
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ __pgprot(_PAGE_PRESENT | _PAGE_READ |
+ _PAGE_ACCESSED)));
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ init_task.kasan_depth = 0;
+}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
new file mode 100644
index 0000000..19fecb3
--- /dev/null
+++ b/arch/riscv/mm/pageattr.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#include <linux/pagewalk.h>
+#include <linux/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/bitops.h>
+#include <asm/set_memory.h>
+
+struct pageattr_masks {
+ pgprot_t set_mask;
+ pgprot_t clear_mask;
+};
+
+static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
+{
+ struct pageattr_masks *masks = walk->private;
+ unsigned long new_val = val;
+
+ new_val &= ~(pgprot_val(masks->clear_mask));
+ new_val |= (pgprot_val(masks->set_mask));
+
+ return new_val;
+}
+
+static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pgd_t val = READ_ONCE(*pgd);
+
+ if (pgd_leaf(val)) {
+ val = __pgd(set_pageattr_masks(pgd_val(val), walk));
+ set_pgd(pgd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ p4d_t val = READ_ONCE(*p4d);
+
+ if (p4d_leaf(val)) {
+ val = __p4d(set_pageattr_masks(p4d_val(val), walk));
+ set_p4d(p4d, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pud_t val = READ_ONCE(*pud);
+
+ if (pud_leaf(val)) {
+ val = __pud(set_pageattr_masks(pud_val(val), walk));
+ set_pud(pud, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pmd_t val = READ_ONCE(*pmd);
+
+ if (pmd_leaf(val)) {
+ val = __pmd(set_pageattr_masks(pmd_val(val), walk));
+ set_pmd(pmd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t val = READ_ONCE(*pte);
+
+ val = __pte(set_pageattr_masks(pte_val(val), walk));
+ set_pte(pte, val);
+
+ return 0;
+}
+
+static int pageattr_pte_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ /* Nothing to do here */
+ return 0;
+}
+
+static const struct mm_walk_ops pageattr_ops = {
+ .pgd_entry = pageattr_pgd_entry,
+ .p4d_entry = pageattr_p4d_entry,
+ .pud_entry = pageattr_pud_entry,
+ .pmd_entry = pageattr_pmd_entry,
+ .pte_entry = pageattr_pte_entry,
+ .pte_hole = pageattr_pte_hole,
+};
+
+static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
+ pgprot_t clear_mask)
+{
+ int ret;
+ unsigned long start = addr;
+ unsigned long end = start + PAGE_SIZE * numpages;
+ struct pageattr_masks masks = {
+ .set_mask = set_mask,
+ .clear_mask = clear_mask
+ };
+
+ if (!numpages)
+ return 0;
+
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+ &masks);
+ mmap_read_unlock(&init_mm);
+
+ flush_tlb_kernel_range(start, end);
+
+ return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
+ __pgprot(_PAGE_WRITE));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
+ __pgprot(0));
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC));
+}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ int ret;
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+ struct pageattr_masks masks = {
+ .set_mask = __pgprot(0),
+ .clear_mask = __pgprot(_PAGE_PRESENT)
+ };
+
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_unlock(&init_mm);
+
+ return ret;
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ int ret;
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+ struct pageattr_masks masks = {
+ .set_mask = PAGE_KERNEL,
+ .clear_mask = __pgprot(0)
+ };
+
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_unlock(&init_mm);
+
+ return ret;
+}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (!debug_pagealloc_enabled())
+ return;
+
+ if (enable)
+ __set_memory((unsigned long)page_address(page), numpages,
+ __pgprot(_PAGE_PRESENT), __pgprot(0));
+ else
+ __set_memory((unsigned long)page_address(page), numpages,
+ __pgprot(0), __pgprot(_PAGE_PRESENT));
+}
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
new file mode 100644
index 0000000..e8e4dcd
--- /dev/null
+++ b/arch/riscv/mm/physaddr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+ phys_addr_t y = x - PAGE_OFFSET;
+
+ /*
+ * Boundary checking aginst the kernel linear mapping space.
+ */
+ WARN(y >= KERN_VIRT_SIZE,
+ "virt_to_phys used for non-linear address: %pK (%pS)\n",
+ (void *)x, (void *)x);
+
+ return __va_to_pa_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+ unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+ unsigned long kernel_end = (unsigned long)_end;
+
+ /*
+ * Boundary checking aginst the kernel image mapping.
+ * __pa_symbol should only be used on kernel symbol addresses.
+ */
+ VIRTUAL_BUG_ON(x < kernel_start || x > kernel_end);
+
+ return __va_to_pa_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
new file mode 100644
index 0000000..ace74de
--- /dev/null
+++ b/arch/riscv/mm/ptdump.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/ptdump.h>
+
+#include <asm/ptdump.h>
+#include <linux/pgtable.h>
+#include <asm/kasan.h>
+
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ if (m) \
+ seq_printf(m, fmt); \
+})
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned long start_pa;
+ unsigned long last_pa;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+};
+
+/* Address marker */
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+/* Private information for debugfs */
+struct ptd_mm_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+ unsigned long end;
+};
+
+static struct addr_marker address_markers[] = {
+#ifdef CONFIG_KASAN
+ {KASAN_SHADOW_START, "Kasan shadow start"},
+ {KASAN_SHADOW_END, "Kasan shadow end"},
+#endif
+ {FIXADDR_START, "Fixmap start"},
+ {FIXADDR_TOP, "Fixmap end"},
+ {PCI_IO_START, "PCI I/O start"},
+ {PCI_IO_END, "PCI I/O end"},
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ {VMEMMAP_START, "vmemmap start"},
+ {VMEMMAP_END, "vmemmap end"},
+#endif
+ {VMALLOC_START, "vmalloc() area"},
+ {VMALLOC_END, "vmalloc() end"},
+ {PAGE_OFFSET, "Linear mapping"},
+ {-1, NULL},
+};
+
+static struct ptd_mm_info kernel_ptd_info = {
+ .mm = &init_mm,
+ .markers = address_markers,
+ .base_addr = KERN_VIRT_START,
+ .end = ULONG_MAX,
+};
+
+#ifdef CONFIG_EFI
+static struct addr_marker efi_addr_markers[] = {
+ { 0, "UEFI runtime start" },
+ { SZ_1G, "UEFI runtime end" },
+ { -1, NULL }
+};
+
+static struct ptd_mm_info efi_ptd_info = {
+ .mm = &efi_mm,
+ .markers = efi_addr_markers,
+ .base_addr = 0,
+ .end = SZ_2G,
+};
+#endif
+
+/* Page Table Entry */
+struct prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+ {
+ .mask = _PAGE_SOFT,
+ .val = _PAGE_SOFT,
+ .set = "RSW",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "D",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "A",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_GLOBAL,
+ .val = _PAGE_GLOBAL,
+ .set = "G",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_USER,
+ .val = _PAGE_USER,
+ .set = "U",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = "X",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_WRITE,
+ .val = _PAGE_WRITE,
+ .set = "W",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_READ,
+ .val = _PAGE_READ,
+ .set = "R",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "V",
+ .clear = ".",
+ }
+};
+
+/* Page Level */
+struct pg_level {
+ const char *name;
+ u64 mask;
+};
+
+static struct pg_level pg_level[] = {
+ { /* pgd */
+ .name = "PGD",
+ }, { /* p4d */
+ .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD",
+ }, { /* pud */
+ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
+ }, { /* pmd */
+ .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
+ }, { /* pte */
+ .name = "PTE",
+ },
+};
+
+static void dump_prot(struct pg_state *st)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
+ const char *s;
+
+ if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val)
+ s = pte_bits[i].set;
+ else
+ s = pte_bits[i].clear;
+
+ if (s)
+ pt_dump_seq_printf(st->seq, " %s", s);
+ }
+}
+
+#ifdef CONFIG_64BIT
+#define ADDR_FORMAT "0x%016lx"
+#else
+#define ADDR_FORMAT "0x%08lx"
+#endif
+static void dump_addr(struct pg_state *st, unsigned long addr)
+{
+ static const char units[] = "KMGTPE";
+ const char *unit = units;
+ unsigned long delta;
+
+ pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " ",
+ st->start_address, addr);
+
+ pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " ", st->start_pa);
+ delta = (addr - st->start_address) >> 10;
+
+ while (!(delta & 1023) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+
+ pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
+ pg_level[st->level].name);
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+
+ if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) !=
+ (_PAGE_WRITE | _PAGE_EXEC))
+ return;
+
+ WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, u64 val)
+{
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
+ u64 prot = 0;
+
+ if (level >= 0)
+ prot = val & pg_level[level].mask;
+
+ if (st->level == -1) {
+ st->level = level;
+ st->current_prot = prot;
+ st->start_address = addr;
+ st->start_pa = pa;
+ st->last_pa = pa;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ } else if (prot != st->current_prot ||
+ level != st->level || addr >= st->marker[1].start_address) {
+ if (st->current_prot) {
+ note_prot_wx(st, addr);
+ dump_addr(st, addr);
+ dump_prot(st);
+ pt_dump_seq_puts(st->seq, "\n");
+ }
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
+ st->marker->name);
+ }
+
+ st->start_address = addr;
+ st->start_pa = pa;
+ st->last_pa = pa;
+ st->current_prot = prot;
+ st->level = level;
+ } else {
+ st->last_pa = pa;
+ }
+}
+
+static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
+{
+ struct pg_state st = {
+ .seq = s,
+ .marker = pinfo->markers,
+ .level = -1,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {pinfo->base_addr, pinfo->end},
+ {0, 0}
+ }
+ }
+ };
+
+ ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
+}
+
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ {0, NULL},
+ {-1, NULL},
+ },
+ .level = -1,
+ .check_wx = true,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {KERN_VIRT_START, ULONG_MAX},
+ {0, 0}
+ }
+ }
+ };
+
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n",
+ st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ ptdump_walk(m, m->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static int ptdump_init(void)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+ for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
+ pg_level[i].mask |= pte_bits[j].mask;
+
+ debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
+ &ptdump_fops);
+#ifdef CONFIG_EFI
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
+ &ptdump_fops);
+#endif
+
+ return 0;
+}
+
+device_initcall(ptdump_init);
diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c
deleted file mode 100644
index a9ffff3..0000000
--- a/arch/riscv/mm/sifive_l2_cache.c
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * SiFive L2 cache controller Driver
- *
- * Copyright (C) 2018-2019 SiFive, Inc.
- *
- */
-#include <linux/debugfs.h>
-#include <linux/interrupt.h>
-#include <linux/of_irq.h>
-#include <linux/of_address.h>
-#include <asm/sifive_l2_cache.h>
-
-#define SIFIVE_L2_DIRECCFIX_LOW 0x100
-#define SIFIVE_L2_DIRECCFIX_HIGH 0x104
-#define SIFIVE_L2_DIRECCFIX_COUNT 0x108
-
-#define SIFIVE_L2_DATECCFIX_LOW 0x140
-#define SIFIVE_L2_DATECCFIX_HIGH 0x144
-#define SIFIVE_L2_DATECCFIX_COUNT 0x148
-
-#define SIFIVE_L2_DATECCFAIL_LOW 0x160
-#define SIFIVE_L2_DATECCFAIL_HIGH 0x164
-#define SIFIVE_L2_DATECCFAIL_COUNT 0x168
-
-#define SIFIVE_L2_CONFIG 0x00
-#define SIFIVE_L2_WAYENABLE 0x08
-#define SIFIVE_L2_ECCINJECTERR 0x40
-
-#define SIFIVE_L2_MAX_ECCINTR 3
-
-static void __iomem *l2_base;
-static int g_irq[SIFIVE_L2_MAX_ECCINTR];
-
-enum {
- DIR_CORR = 0,
- DATA_CORR,
- DATA_UNCORR,
-};
-
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *sifive_test;
-
-static ssize_t l2_write(struct file *file, const char __user *data,
- size_t count, loff_t *ppos)
-{
- unsigned int val;
-
- if (kstrtouint_from_user(data, count, 0, &val))
- return -EINVAL;
- if ((val >= 0 && val < 0xFF) || (val >= 0x10000 && val < 0x100FF))
- writel(val, l2_base + SIFIVE_L2_ECCINJECTERR);
- else
- return -EINVAL;
- return count;
-}
-
-static const struct file_operations l2_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .write = l2_write
-};
-
-static void setup_sifive_debug(void)
-{
- sifive_test = debugfs_create_dir("sifive_l2_cache", NULL);
-
- debugfs_create_file("sifive_debug_inject_error", 0200,
- sifive_test, NULL, &l2_fops);
-}
-#endif
-
-static void l2_config_read(void)
-{
- u32 regval, val;
-
- regval = readl(l2_base + SIFIVE_L2_CONFIG);
- val = regval & 0xFF;
- pr_info("L2CACHE: No. of Banks in the cache: %d\n", val);
- val = (regval & 0xFF00) >> 8;
- pr_info("L2CACHE: No. of ways per bank: %d\n", val);
- val = (regval & 0xFF0000) >> 16;
- pr_info("L2CACHE: Sets per bank: %llu\n", (uint64_t)1 << val);
- val = (regval & 0xFF000000) >> 24;
- pr_info("L2CACHE: Bytes per cache block: %llu\n", (uint64_t)1 << val);
-
- regval = readl(l2_base + SIFIVE_L2_WAYENABLE);
- pr_info("L2CACHE: Index of the largest way enabled: %d\n", regval);
-}
-
-static const struct of_device_id sifive_l2_ids[] = {
- { .compatible = "sifive,fu540-c000-ccache" },
- { /* end of table */ },
-};
-
-static ATOMIC_NOTIFIER_HEAD(l2_err_chain);
-
-int register_sifive_l2_error_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&l2_err_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_sifive_l2_error_notifier);
-
-int unregister_sifive_l2_error_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&l2_err_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_sifive_l2_error_notifier);
-
-static irqreturn_t l2_int_handler(int irq, void *device)
-{
- unsigned int add_h, add_l;
-
- if (irq == g_irq[DIR_CORR]) {
- add_h = readl(l2_base + SIFIVE_L2_DIRECCFIX_HIGH);
- add_l = readl(l2_base + SIFIVE_L2_DIRECCFIX_LOW);
- pr_err("L2CACHE: DirError @ 0x%08X.%08X\n", add_h, add_l);
- /* Reading this register clears the DirError interrupt sig */
- readl(l2_base + SIFIVE_L2_DIRECCFIX_COUNT);
- atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE,
- "DirECCFix");
- }
- if (irq == g_irq[DATA_CORR]) {
- add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH);
- add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW);
- pr_err("L2CACHE: DataError @ 0x%08X.%08X\n", add_h, add_l);
- /* Reading this register clears the DataError interrupt sig */
- readl(l2_base + SIFIVE_L2_DATECCFIX_COUNT);
- atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE,
- "DatECCFix");
- }
- if (irq == g_irq[DATA_UNCORR]) {
- add_h = readl(l2_base + SIFIVE_L2_DATECCFAIL_HIGH);
- add_l = readl(l2_base + SIFIVE_L2_DATECCFAIL_LOW);
- pr_err("L2CACHE: DataFail @ 0x%08X.%08X\n", add_h, add_l);
- /* Reading this register clears the DataFail interrupt sig */
- readl(l2_base + SIFIVE_L2_DATECCFAIL_COUNT);
- atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE,
- "DatECCFail");
- }
-
- return IRQ_HANDLED;
-}
-
-static int __init sifive_l2_init(void)
-{
- struct device_node *np;
- struct resource res;
- int i, rc;
-
- np = of_find_matching_node(NULL, sifive_l2_ids);
- if (!np)
- return -ENODEV;
-
- if (of_address_to_resource(np, 0, &res))
- return -ENODEV;
-
- l2_base = ioremap(res.start, resource_size(&res));
- if (!l2_base)
- return -ENOMEM;
-
- for (i = 0; i < SIFIVE_L2_MAX_ECCINTR; i++) {
- g_irq[i] = irq_of_parse_and_map(np, i);
- rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL);
- if (rc) {
- pr_err("L2CACHE: Could not request IRQ %d\n", g_irq[i]);
- return rc;
- }
- }
-
- l2_config_read();
-
-#ifdef CONFIG_DEBUG_FS
- setup_sifive_debug();
-#endif
- return 0;
-}
-device_initcall(sifive_l2_init);
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 24cd33d..720b443 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -2,6 +2,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/sched.h>
#include <asm/sbi.h>
void flush_tlb_all(void)
@@ -9,13 +10,33 @@
sbi_remote_sfence_vma(NULL, 0, -1);
}
+/*
+ * This function must not be called with cmask being null.
+ * Kernel may panic if cmask is NULL.
+ */
static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
unsigned long size)
{
struct cpumask hmask;
+ unsigned int cpuid;
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(hmask.bits, start, size);
+ if (cpumask_empty(cmask))
+ return;
+
+ cpuid = get_cpu();
+
+ if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+ /* local cpu is the only cpu present in cpumask */
+ if (size <= PAGE_SIZE)
+ local_flush_tlb_page(start);
+ else
+ local_flush_tlb_all();
+ } else {
+ riscv_cpuid_to_hartid_mask(cmask, &hmask);
+ sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size);
+ }
+
+ put_cpu();
}
void flush_tlb_mm(struct mm_struct *mm)