Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index edcdca9..4fa7a56 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -787,14 +787,18 @@
static inline unsigned long *gmap_table_walk(struct gmap *gmap,
unsigned long gaddr, int level)
{
+ const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table;
if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
return NULL;
if (gmap_is_shadow(gmap) && gmap->removed)
return NULL;
- if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11)))
+
+ if (asce_type != _ASCE_TYPE_REGION1 &&
+ gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
return NULL;
+
table = gmap->table;
switch (gmap->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
@@ -1840,6 +1844,7 @@
goto out_free;
} else if (*table & _REGION_ENTRY_ORIGIN) {
rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
}
crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
/* mark as invalid as long as the parent table is not protected */
@@ -2480,23 +2485,36 @@
}
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+
+ split_huge_pmd(vma, pmd, addr);
+ return 0;
+}
+
+static const struct mm_walk_ops thp_split_walk_ops = {
+ .pmd_entry = thp_split_walk_pmd_entry,
+};
+
static inline void thp_split_mm(struct mm_struct *mm)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct vm_area_struct *vma;
- unsigned long addr;
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- for (addr = vma->vm_start;
- addr < vma->vm_end;
- addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
+ walk_page_vma(vma, &thp_split_walk_ops, NULL);
}
mm->def_flags |= VM_NOHUGEPAGE;
-#endif
}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* Remove all empty zero pages from the mapping for lazy refaulting
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index b0246c7..ff8234b 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -2,7 +2,7 @@
/*
* IBM System z Huge TLB Page Support for Kernel.
*
- * Copyright IBM Corp. 2007,2016
+ * Copyright IBM Corp. 2007,2020
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
@@ -11,6 +11,9 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>
/*
* If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -114,7 +117,7 @@
_PAGE_YOUNG);
#ifdef CONFIG_MEM_SOFT_DIRTY
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
- _PAGE_DIRTY);
+ _PAGE_SOFT_DIRTY);
#endif
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
_PAGE_NOEXEC);
@@ -156,10 +159,13 @@
rste &= ~_SEGMENT_ENTRY_NOEXEC;
/* Set correct table type for 2G hugepages */
- if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
- else
+ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+ if (likely(pte_present(pte)))
+ rste |= _REGION3_ENTRY_LARGE;
+ rste |= _REGION_ENTRY_TYPE_R3;
+ } else if (likely(pte_present(pte)))
rste |= _SEGMENT_ENTRY_LARGE;
+
clear_huge_pte_skeys(mm, rste);
pte_val(*ptep) = rste;
}
@@ -267,3 +273,98 @@
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+ unsigned long addr0, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+ unsigned long addr;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ goto check_asce_limit;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
+ }
+
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+ addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
+ else
+ addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19..5521f59 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -168,9 +168,9 @@
return;
/* make sure bounce buffers are shared */
+ swiotlb_force = SWIOTLB_FORCE;
swiotlb_init(1);
swiotlb_update_mem_attributes();
- swiotlb_force = SWIOTLB_FORCE;
}
void __init mem_init(void)
@@ -291,10 +291,8 @@
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
vmem_remove_mapping(start, size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 460f255..5182e08 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -101,6 +101,9 @@
pgt_prot = pgprot_val(PAGE_KERNEL_EXEC);
sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC);
+ /*
+ * The first 1MB of 1:1 mapping is mapped with 4KB pages
+ */
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -146,30 +149,26 @@
pm_dir = pmd_offset(pu_dir, address);
if (pmd_none(*pm_dir)) {
- if (mode == POPULATE_ZERO_SHADOW &&
- IS_ALIGNED(address, PMD_SIZE) &&
+ if (IS_ALIGNED(address, PMD_SIZE) &&
end - address >= PMD_SIZE) {
- pmd_populate(&init_mm, pm_dir,
- kasan_early_shadow_pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- /* the first megabyte of 1:1 is mapped with 4k pages */
- if (has_edat && address && end - address >= PMD_SIZE &&
- mode != POPULATE_ZERO_SHADOW) {
- void *page;
+ if (mode == POPULATE_ZERO_SHADOW) {
+ pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ } else if (has_edat && address) {
+ void *page;
- if (mode == POPULATE_ONE2ONE) {
- page = (void *)address;
- } else {
- page = kasan_early_alloc_segment();
- memset(page, 0, _SEGMENT_SIZE);
+ if (mode == POPULATE_ONE2ONE) {
+ page = (void *)address;
+ } else {
+ page = kasan_early_alloc_segment();
+ memset(page, 0, _SEGMENT_SIZE);
+ }
+ pmd_val(*pm_dir) = __pa(page) | sgt_prot;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
}
- pmd_val(*pm_dir) = __pa(page) | sgt_prot;
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
}
-
pt_dir = kasan_early_pte_alloc();
pmd_populate(&init_mm, pm_dir, pt_dir);
} else if (pmd_large(*pm_dir)) {
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 1864a8b..1d17413 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -55,22 +55,29 @@
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
-void notrace s390_kernel_write(void *dst, const void *src, size_t size)
+notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
{
+ void *tmp = dst;
unsigned long flags;
long copied;
spin_lock_irqsave(&s390_kernel_write_lock, flags);
- while (size) {
- copied = s390_kernel_write_odd(dst, src, size);
- dst += copied;
- src += copied;
- size -= copied;
+ if (!(flags & PSW_MASK_DAT)) {
+ memcpy(dst, src, size);
+ } else {
+ while (size) {
+ copied = s390_kernel_write_odd(tmp, src, size);
+ tmp += copied;
+ src += copied;
+ size -= copied;
+ }
}
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
+
+ return dst;
}
-static int __memcpy_real(void *dest, void *src, size_t count)
+static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
{
register unsigned long _dest asm("2") = (unsigned long) dest;
register unsigned long _len1 asm("3") = (unsigned long) count;
@@ -91,19 +98,23 @@
return rc;
}
-static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
- unsigned long count)
+static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
+ unsigned long src,
+ unsigned long count)
{
int irqs_disabled, rc;
unsigned long flags;
if (!count)
return 0;
- flags = __arch_local_irq_stnsm(0xf8UL);
+ flags = arch_local_irq_save();
irqs_disabled = arch_irqs_disabled_flags(flags);
if (!irqs_disabled)
trace_hardirqs_off();
+ __arch_local_irq_stnsm(0xf8); // disable DAT
rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
+ if (flags & PSW_MASK_DAT)
+ __arch_local_irq_stosm(0x04); // enable DAT
if (!irqs_disabled)
trace_hardirqs_on();
__arch_local_irq_ssm(flags);
@@ -115,9 +126,15 @@
*/
int memcpy_real(void *dest, void *src, size_t count)
{
- if (S390_lowcore.nodat_stack != 0)
- return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
- 3, dest, src, count);
+ int rc;
+
+ if (S390_lowcore.nodat_stack != 0) {
+ preempt_disable();
+ rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3,
+ dest, src, count);
+ preempt_enable();
+ return rc;
+ }
/*
* This is a really early memcpy_real call, the stacks are
* not set up yet. Just call _memcpy_real on the early boot
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 3dd253f..46071be 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -70,8 +70,20 @@
{
struct mm_struct *mm = arg;
- if (current->active_mm == mm)
- set_user_asce(mm);
+ /* we must change all active ASCEs to avoid the creation of new TLBs */
+ if (current->active_mm == mm) {
+ S390_lowcore.user_asce = mm->context.asce;
+ if (current->thread.mm_segment == USER_DS) {
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ /* Mark user-ASCE present in CR1 */
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+ if (current->thread.mm_segment == USER_DS_SACF) {
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ /* enable_sacf_uaccess does all or nothing */
+ WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY));
+ }
+ }
__tlb_flush_local();
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b403fa1..f810930 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -415,6 +415,10 @@
SET_MEMORY_RO | SET_MEMORY_X);
__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+
+ /* we need lowcore executable for our LPSWE instructions */
+ set_memory_x(0, 1);
+
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}