Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/mm/mmap.c b/mm/mmap.c
index ba78f1f..5c8b448 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -53,6 +53,9 @@
#include <asm/tlb.h>
#include <asm/mmu_context.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mmap.h>
+
#include "internal.h"
#ifndef arch_mmap_check
@@ -129,7 +132,7 @@
vm_flags &= ~VM_SHARED;
vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
}
- /* remove_protection_ptes reads vma->vm_page_prot without mmap_sem */
+ /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
}
@@ -140,7 +143,7 @@
struct file *file, struct address_space *mapping)
{
if (vma->vm_flags & VM_DENYWRITE)
- atomic_inc(&file_inode(file)->i_writecount);
+ allow_write_access(file);
if (vma->vm_flags & VM_SHARED)
mapping_unmap_writable(mapping);
@@ -195,7 +198,7 @@
bool downgraded = false;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
origbrk = mm->brk;
@@ -235,14 +238,14 @@
/*
* Always allow shrinking brk.
- * __do_munmap() may downgrade mmap_sem to read.
+ * __do_munmap() may downgrade mmap_lock to read.
*/
if (brk <= mm->brk) {
int ret;
/*
- * mm->brk must to be protected by write mmap_sem so update it
- * before downgrading mmap_sem. When __do_munmap() fails,
+ * mm->brk must to be protected by write mmap_lock so update it
+ * before downgrading mmap_lock. When __do_munmap() fails,
* mm->brk will be restored from origbrk.
*/
mm->brk = brk;
@@ -269,9 +272,9 @@
success:
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
if (downgraded)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
else
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
@@ -279,7 +282,7 @@
out:
retval = origbrk;
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return retval;
}
@@ -471,8 +474,12 @@
{
/*
* All rb_subtree_gap values must be consistent prior to erase,
- * with the possible exception of the "next" vma being erased if
- * next->vm_start was reduced.
+ * with the possible exception of
+ *
+ * a. the "next" vma being erased if next->vm_start was reduced in
+ * __vma_adjust() -> __vma_unlink()
+ * b. the vma being erased in detach_vmas_to_be_unmapped() ->
+ * vma_rb_erase()
*/
validate_mm_rb(root, ignore);
@@ -482,13 +489,7 @@
static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
struct rb_root *root)
{
- /*
- * All rb_subtree_gap values must be consistent prior to erase,
- * with the possible exception of the vma being erased.
- */
- validate_mm_rb(root, vma);
-
- __vma_rb_erase(vma, root);
+ vma_rb_erase_ignore(vma, root, vma);
}
/*
@@ -502,7 +503,7 @@
* After the update, the vma will be reinserted using
* anon_vma_interval_tree_post_update_vma().
*
- * The entire update must be protected by exclusive mmap_sem and by
+ * The entire update must be protected by exclusive mmap_lock and by
* the root anon_vma's mutex.
*/
static inline void
@@ -557,6 +558,50 @@
return 0;
}
+/*
+ * vma_next() - Get the next VMA.
+ * @mm: The mm_struct.
+ * @vma: The current vma.
+ *
+ * If @vma is NULL, return the first vma in the mm.
+ *
+ * Returns: The next VMA after @vma.
+ */
+static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ if (!vma)
+ return mm->mmap;
+
+ return vma->vm_next;
+}
+
+/*
+ * munmap_vma_range() - munmap VMAs that overlap a range.
+ * @mm: The mm struct
+ * @start: The start of the range.
+ * @len: The length of the range.
+ * @pprev: pointer to the pointer that will be set to previous vm_area_struct
+ * @rb_link: the rb_node
+ * @rb_parent: the parent rb_node
+ *
+ * Find all the vm_area_struct that overlap from @start to
+ * @end and munmap them. Set @pprev to the previous vm_area_struct.
+ *
+ * Returns: -ENOMEM on munmap failure or 0 on success.
+ */
+static inline int
+munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
+ struct vm_area_struct **pprev, struct rb_node ***link,
+ struct rb_node **parent, struct list_head *uf)
+{
+
+ while (find_vma_links(mm, start, start + len, pprev, link, parent))
+ if (do_munmap(mm, start, len, uf))
+ return -ENOMEM;
+
+ return 0;
+}
static unsigned long count_vma_pages_range(struct mm_struct *mm,
unsigned long addr, unsigned long end)
{
@@ -618,9 +663,9 @@
struct address_space *mapping = file->f_mapping;
if (vma->vm_flags & VM_DENYWRITE)
- atomic_dec(&file_inode(file)->i_writecount);
+ put_write_access(file_inode(file));
if (vma->vm_flags & VM_SHARED)
- atomic_inc(&mapping->i_mmap_writable);
+ mapping_allow_writable(mapping);
flush_dcache_mmap_lock(mapping);
vma_interval_tree_insert(vma, &mapping->i_mmap);
@@ -633,7 +678,7 @@
struct vm_area_struct *prev, struct rb_node **rb_link,
struct rb_node *rb_parent)
{
- __vma_link_list(mm, vma, prev, rb_parent);
+ __vma_link_list(mm, vma, prev);
__vma_link_rb(mm, vma, rb_link, rb_parent);
}
@@ -674,39 +719,16 @@
mm->map_count++;
}
-static __always_inline void __vma_unlink_common(struct mm_struct *mm,
+static __always_inline void __vma_unlink(struct mm_struct *mm,
struct vm_area_struct *vma,
- struct vm_area_struct *prev,
- bool has_prev,
struct vm_area_struct *ignore)
{
- struct vm_area_struct *next;
-
vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
- next = vma->vm_next;
- if (has_prev)
- prev->vm_next = next;
- else {
- prev = vma->vm_prev;
- if (prev)
- prev->vm_next = next;
- else
- mm->mmap = next;
- }
- if (next)
- next->vm_prev = prev;
-
+ __vma_unlink_list(mm, vma);
/* Kill the cache */
vmacache_invalidate(mm);
}
-static inline void __vma_unlink_prev(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct vm_area_struct *prev)
-{
- __vma_unlink_common(mm, vma, prev, true, vma);
-}
-
/*
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
* is already present in an i_mmap tree without adjusting the tree.
@@ -761,8 +783,6 @@
remove_next = 1 + (end > next->vm_end);
VM_WARN_ON(remove_next == 2 &&
end != next->vm_next->vm_end);
- VM_WARN_ON(remove_next == 1 &&
- end != next->vm_end);
/* trim end to next, for case 6 first pass */
end = next->vm_end;
}
@@ -782,7 +802,7 @@
* vma expands, overlapping part of the next:
* mprotect case 5 shifting the boundary up.
*/
- adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
+ adjust_next = (end - next->vm_start);
exporter = next;
importer = vma;
VM_WARN_ON(expand != importer);
@@ -792,7 +812,7 @@
* split_vma inserting another: so it must be
* mprotect case 4 shifting the boundary down.
*/
- adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
+ adjust_next = -(vma->vm_end - end);
exporter = vma;
importer = next;
VM_WARN_ON(expand != importer);
@@ -847,7 +867,7 @@
anon_vma_interval_tree_pre_update_vma(next);
}
- if (root) {
+ if (file) {
flush_dcache_mmap_lock(mapping);
vma_interval_tree_remove(vma, root);
if (adjust_next)
@@ -864,11 +884,11 @@
}
vma->vm_pgoff = pgoff;
if (adjust_next) {
- next->vm_start += adjust_next << PAGE_SHIFT;
- next->vm_pgoff += adjust_next;
+ next->vm_start += adjust_next;
+ next->vm_pgoff += adjust_next >> PAGE_SHIFT;
}
- if (root) {
+ if (file) {
if (adjust_next)
vma_interval_tree_insert(next, root);
vma_interval_tree_insert(vma, root);
@@ -881,7 +901,7 @@
* us to remove next before dropping the locks.
*/
if (remove_next != 3)
- __vma_unlink_prev(mm, next, vma);
+ __vma_unlink(mm, next, next);
else
/*
* vma is not before next if they've been
@@ -892,7 +912,7 @@
* "next" (which is stored in post-swap()
* "vma").
*/
- __vma_unlink_common(mm, next, NULL, false, vma);
+ __vma_unlink(mm, next, vma);
if (file)
__remove_shared_vm_struct(next, file, mapping);
} else if (insert) {
@@ -919,10 +939,9 @@
anon_vma_interval_tree_post_update_vma(next);
anon_vma_unlock_write(anon_vma);
}
- if (mapping)
- i_mmap_unlock_write(mapping);
- if (root) {
+ if (file) {
+ i_mmap_unlock_write(mapping);
uprobe_mmap(vma);
if (adjust_next)
@@ -1052,7 +1071,7 @@
* anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
*
* We don't check here for the merged mmap wrapping around the end of pagecache
- * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * indices (16TB on ia32) because do_mmap() does not permit mmap's which
* wrap, nor mmaps which cover the final page at index -1UL.
*/
static int
@@ -1108,15 +1127,18 @@
* the area passed down from mprotect_fixup, never extending beyond one
* vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
*
- * AAAA AAAA AAAA AAAA
- * PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN PPPPNNNNXXXX
- * cannot merge might become might become might become
- * PPNNNNNNNNNN PPPPPPPPPPNN PPPPPPPPPPPP 6 or
- * mmap, brk or case 4 below case 5 below PPPPPPPPXXXX 7 or
- * mremap move: PPPPXXXXXXXX 8
- * AAAA
- * PPPP NNNN PPPPPPPPPPPP PPPPPPPPNNNN PPPPNNNNNNNN
- * might become case 1 below case 2 below case 3 below
+ * AAAA AAAA AAAA
+ * PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN
+ * cannot merge might become might become
+ * PPNNNNNNNNNN PPPPPPPPPPNN
+ * mmap, brk or case 4 below case 5 below
+ * mremap move:
+ * AAAA AAAA
+ * PPPP NNNN PPPPNNNNXXXX
+ * might become might become
+ * PPPPPPPPPPPP 1 or PPPPPPPPPPPP 6 or
+ * PPPPPPPPNNNN 2 or PPPPPPPPXXXX 7 or
+ * PPPPNNNNNNNN 3 PPPPXXXXXXXX 8
*
* It is important for case 8 that the vma NNNN overlapping the
* region AAAA is never going to extended over XXXX. Instead XXXX must
@@ -1150,10 +1172,7 @@
if (vm_flags & VM_SPECIAL)
return NULL;
- if (prev)
- next = prev->vm_next;
- else
- next = mm->mmap;
+ next = vma_next(mm, prev);
area = next;
if (area && area->vm_end == end) /* cases 6, 7, 8 */
next = next->vm_next;
@@ -1226,7 +1245,7 @@
}
/*
- * Rough compatbility check to quickly see if it's even worth looking
+ * Rough compatibility check to quickly see if it's even worth looking
* at sharing an anon_vma.
*
* They need to have the same vm_file, and the flags can only differ
@@ -1243,7 +1262,7 @@
return a->vm_end == b->vm_start &&
mpol_equal(vma_policy(a), vma_policy(b)) &&
a->vm_file == b->vm_file &&
- !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
+ !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}
@@ -1290,26 +1309,22 @@
*/
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{
- struct anon_vma *anon_vma;
- struct vm_area_struct *near;
+ struct anon_vma *anon_vma = NULL;
- near = vma->vm_next;
- if (!near)
- goto try_prev;
+ /* Try next first. */
+ if (vma->vm_next) {
+ anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
+ if (anon_vma)
+ return anon_vma;
+ }
- anon_vma = reusable_anon_vma(near, vma, near);
- if (anon_vma)
- return anon_vma;
-try_prev:
- near = vma->vm_prev;
- if (!near)
- goto none;
+ /* Try prev next. */
+ if (vma->vm_prev)
+ anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
- anon_vma = reusable_anon_vma(near, near, vma);
- if (anon_vma)
- return anon_vma;
-none:
/*
+ * We might reach here with anon_vma == NULL if we can't find
+ * any reusable anon_vma.
* There's no absolute need to look only at touching neighbours:
* we could search further afield for "compatible" anon_vmas.
* But it would probably just be a waste of time searching,
@@ -1317,7 +1332,7 @@
* We're trying to allow mprotect remerging later on,
* not trying to minimize memory used for anon_vmas.
*/
- return NULL;
+ return anon_vma;
}
/*
@@ -1384,15 +1399,15 @@
}
/*
- * The caller must hold down_write(¤t->mm->mmap_sem).
+ * The caller must write-lock current->mm->mmap_lock.
*/
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, vm_flags_t vm_flags,
- unsigned long pgoff, unsigned long *populate,
- struct list_head *uf)
+ unsigned long flags, unsigned long pgoff,
+ unsigned long *populate, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
+ vm_flags_t vm_flags;
int pkey = 0;
*populate = 0;
@@ -1434,7 +1449,7 @@
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);
- if (offset_in_page(addr))
+ if (IS_ERR_VALUE(addr))
return addr;
if (flags & MAP_FIXED_NOREPLACE) {
@@ -1454,7 +1469,7 @@
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+ vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
@@ -1483,7 +1498,7 @@
* with MAP_SHARED to preserve backward compatibility.
*/
flags &= LEGACY_MAP_MASK;
- /* fall through */
+ fallthrough;
case MAP_SHARED_VALIDATE:
if (flags & ~flags_mask)
return -EOPNOTSUPP;
@@ -1510,8 +1525,7 @@
vm_flags |= VM_SHARED | VM_MAYSHARE;
if (!(file->f_mode & FMODE_WRITE))
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
-
- /* fall through */
+ fallthrough;
case MAP_PRIVATE:
if (!(file->f_mode & FMODE_READ))
return -EACCES;
@@ -1586,11 +1600,12 @@
file = fget(fd);
if (!file)
return -EBADF;
- if (is_file_hugepages(file))
+ if (is_file_hugepages(file)) {
len = ALIGN(len, huge_page_size(hstate_file(file)));
- retval = -EINVAL;
- if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+ } else if (unlikely(flags & MAP_HUGETLB)) {
+ retval = -EINVAL;
goto out_fput;
+ }
} else if (flags & MAP_HUGETLB) {
struct user_struct *user = NULL;
struct hstate *hs;
@@ -1689,7 +1704,7 @@
/* Can the mapping track the dirty pages? */
return vma->vm_file && vma->vm_file->f_mapping &&
- mapping_cap_account_dirty(vma->vm_file->f_mapping);
+ mapping_can_writeback(vma->vm_file->f_mapping);
}
/*
@@ -1713,7 +1728,7 @@
struct list_head *uf)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *prev;
+ struct vm_area_struct *vma, *prev, *merge;
int error;
struct rb_node **rb_link, *rb_parent;
unsigned long charged = 0;
@@ -1733,13 +1748,9 @@
return -ENOMEM;
}
- /* Clear old maps */
- while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
- &rb_parent)) {
- if (do_munmap(mm, addr, len, uf))
- return -ENOMEM;
- }
-
+ /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
+ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
+ return -ENOMEM;
/*
* Private writable mapping: check memory availability
*/
@@ -1807,6 +1818,27 @@
WARN_ON_ONCE(addr != vma->vm_start);
addr = vma->vm_start;
+
+ /* If vm_flags changed after call_mmap(), we should try merge vma again
+ * as we may succeed this time.
+ */
+ if (unlikely(vm_flags != vma->vm_flags && prev)) {
+ merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
+ NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
+ if (merge) {
+ /* ->mmap() can change vma->vm_file and fput the original file. So
+ * fput the vma->vm_file here or we would add an extra fput for file
+ * and cause general protection fault ultimately.
+ */
+ fput(vma->vm_file);
+ vm_area_free(vma);
+ vma = merge;
+ /* Update vm_flags to pick up the change. */
+ vm_flags = vma->vm_flags;
+ goto unmap_writable;
+ }
+ }
+
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
@@ -1816,9 +1848,19 @@
vma_set_anonymous(vma);
}
+ /* Allow architectures to sanity-check the vm_flags */
+ if (!arch_validate_flags(vma->vm_flags)) {
+ error = -EINVAL;
+ if (file)
+ goto unmap_and_free_vma;
+ else
+ goto free_vma;
+ }
+
vma_link(mm, vma, prev, rb_link, rb_parent);
/* Once vma denies write, undo our temporary denial count */
if (file) {
+unmap_writable:
if (vm_flags & VM_SHARED)
mapping_unmap_writable(file->f_mapping);
if (vm_flags & VM_DENYWRITE)
@@ -1874,7 +1916,7 @@
return error;
}
-unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
/*
* We implement the search by looking for an rbtree node that
@@ -1977,7 +2019,7 @@
return gap_start;
}
-unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -2076,6 +2118,27 @@
return gap_end;
}
+/*
+ * Search for an unmapped address range.
+ *
+ * We are looking for a range that:
+ * - does not intersect with any VMA;
+ * - is contained within the [low_limit, high_limit) interval;
+ * - is at least the desired size.
+ * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
+ */
+unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
+{
+ unsigned long addr;
+
+ if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
+ addr = unmapped_area_topdown(info);
+ else
+ addr = unmapped_area(info);
+
+ trace_vm_unmapped_area(addr, info);
+ return addr;
+}
#ifndef arch_get_mmap_end
#define arch_get_mmap_end(addr) (TASK_SIZE)
@@ -2212,7 +2275,7 @@
/*
* mmap_region() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge.
- * do_mmap_pgoff() will clear pgoff, so match alignment.
+ * do_mmap() will clear pgoff, so match alignment.
*/
pgoff = 0;
get_area = shmem_get_unmapped_area;
@@ -2362,8 +2425,7 @@
gap_addr = TASK_SIZE;
next = vma->vm_next;
- if (next && next->vm_start < gap_addr &&
- (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
+ if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
if (!(next->vm_flags & VM_GROWSUP))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
@@ -2375,7 +2437,7 @@
/*
* vma->vm_start/vm_end cannot change under us because the caller
- * is required to hold the mmap_sem in read mode. We need the
+ * is required to hold the mmap_lock in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);
@@ -2393,7 +2455,7 @@
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
- * updates, but we only hold a shared mmap_sem
+ * updates, but we only hold a shared mmap_lock
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
@@ -2444,7 +2506,7 @@
prev = vma->vm_prev;
/* Check that both stack segments have the same anon_vma? */
if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
- (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
+ vma_is_accessible(prev)) {
if (address - prev->vm_end < stack_guard_gap)
return -ENOMEM;
}
@@ -2455,7 +2517,7 @@
/*
* vma->vm_start/vm_end cannot change under us because the caller
- * is required to hold the mmap_sem in read mode. We need the
+ * is required to hold the mmap_lock in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);
@@ -2473,7 +2535,7 @@
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
- * updates, but we only hold a shared mmap_sem
+ * updates, but we only hold a shared mmap_lock
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
@@ -2535,7 +2597,7 @@
if (vma && (vma->vm_start <= addr))
return vma;
/* don't alter vm_end if the coredump is running */
- if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
+ if (!prev || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2561,9 +2623,6 @@
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
- /* don't alter vm_start if the coredump is running */
- if (!mmget_still_valid(mm))
- return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
@@ -2608,7 +2667,7 @@
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end)
{
- struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
+ struct vm_area_struct *next = vma_next(mm, prev);
struct mmu_gather tlb;
lru_add_drain();
@@ -2807,7 +2866,7 @@
if (error)
return error;
}
- vma = prev ? prev->vm_next : mm->mmap;
+ vma = vma_next(mm, prev);
if (unlikely(uf)) {
/*
@@ -2844,7 +2903,7 @@
downgrade = false;
if (downgrade)
- downgrade_write(&mm->mmap_sem);
+ mmap_write_downgrade(mm);
unmap_region(mm, vma, prev, start, end);
@@ -2866,20 +2925,20 @@
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = __do_munmap(mm, start, len, &uf, downgrade);
/*
- * Returning 1 indicates mmap_sem is downgraded.
+ * Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
* it to 0 before return.
*/
if (ret == 1) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
ret = 0;
} else
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
return ret;
@@ -2927,7 +2986,7 @@
if (pgoff + (size >> PAGE_SHIFT) < pgoff)
return ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
vma = find_vma(mm, start);
@@ -2986,11 +3045,11 @@
}
file = get_file(vma->vm_file);
- ret = do_mmap_pgoff(vma->vm_file, start, size,
+ ret = do_mmap(vma->vm_file, start, size,
prot, flags, pgoff, &populate, NULL);
fput(file);
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
if (populate)
mm_populate(ret, populate);
if (!IS_ERR_VALUE(ret))
@@ -3010,28 +3069,24 @@
struct rb_node **rb_link, *rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
+ unsigned long mapped_addr;
/* Until we need other flags, refuse anything except VM_EXEC. */
if ((flags & (~VM_EXEC)) != 0)
return -EINVAL;
flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
- error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
- if (offset_in_page(error))
- return error;
+ mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+ if (IS_ERR_VALUE(mapped_addr))
+ return mapped_addr;
error = mlock_future_check(mm, mm->def_flags, len);
if (error)
return error;
- /*
- * Clear old maps. this also does some error checking for us
- */
- while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
- &rb_parent)) {
- if (do_munmap(mm, addr, len, uf))
- return -ENOMEM;
- }
+ /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
+ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
+ return -ENOMEM;
/* Check against address space limits *after* clearing old maps... */
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
@@ -3089,12 +3144,12 @@
if (!len)
return 0;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = do_brk_flags(addr, len, flags, &uf);
populate = ((mm->def_flags & VM_LOCKED) != 0);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate && !ret)
mm_populate(addr, len);
@@ -3122,12 +3177,12 @@
/*
* Manually reap the mm to free as much memory as possible.
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
- * this mm from further consideration. Taking mm->mmap_sem for
+ * this mm from further consideration. Taking mm->mmap_lock for
* write after setting MMF_OOM_SKIP will guarantee that the oom
- * reaper will not run on this mm again after mmap_sem is
+ * reaper will not run on this mm again after mmap_lock is
* dropped.
*
- * Nothing can be holding mm->mmap_sem here and the above call
+ * Nothing can be holding mm->mmap_lock here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
@@ -3138,8 +3193,8 @@
(void)__oom_reap_task_mm(mm);
set_bit(MMF_OOM_SKIP, &mm->flags);
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+ mmap_write_unlock(mm);
}
if (mm->locked_vm) {
@@ -3205,7 +3260,7 @@
* By setting it to reflect the virtual start address of the
* vma, merges and splits can happen in a seamless way, just
* using the existing file pgoff checks and manipulations.
- * Similarly in do_mmap_pgoff and in do_brk.
+ * Similarly in do_mmap and in do_brk_flags.
*/
if (vma_is_anonymous(vma)) {
BUG_ON(vma->anon_vma);
@@ -3368,6 +3423,8 @@
.fault = special_mapping_fault,
.mremap = special_mapping_mremap,
.name = special_mapping_name,
+ /* vDSO code relies that VVAR can't be accessed remotely */
+ .access = NULL,
};
static const struct vm_operations_struct legacy_special_mapping_vmops = {
@@ -3451,7 +3508,7 @@
}
/*
- * Called with mm->mmap_sem held for writing.
+ * Called with mm->mmap_lock held for writing.
* Insert a new vma covering the given region, with the given flags.
* Its pages are supplied by the given array of struct page *.
* The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
@@ -3488,7 +3545,7 @@
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
+ down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
/*
* We can safely modify head.next after taking the
* anon_vma->root->rwsem. If some other vma in this mm shares
@@ -3518,7 +3575,7 @@
*/
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
BUG();
- down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
+ down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
}
}
@@ -3527,11 +3584,11 @@
* operations that could ever happen on a certain mm. This includes
* vmtruncate, try_to_unmap, and all page faults.
*
- * The caller must take the mmap_sem in write mode before calling
+ * The caller must take the mmap_lock in write mode before calling
* mm_take_all_locks(). The caller isn't allowed to release the
- * mmap_sem until mm_drop_all_locks() returns.
+ * mmap_lock until mm_drop_all_locks() returns.
*
- * mmap_sem in write mode is required in order to block all operations
+ * mmap_lock in write mode is required in order to block all operations
* that could modify pagetables and free pages without need of
* altering the vma layout. It's also needed in write mode to avoid new
* anon_vmas to be associated with existing vmas.
@@ -3564,7 +3621,7 @@
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(mm));
mutex_lock(&mm_all_locks_mutex);
@@ -3636,7 +3693,7 @@
}
/*
- * The mmap_sem cannot be released by the caller until
+ * The mmap_lock cannot be released by the caller until
* mm_drop_all_locks() returns.
*/
void mm_drop_all_locks(struct mm_struct *mm)
@@ -3644,7 +3701,7 @@
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(mm));
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
for (vma = mm->mmap; vma; vma = vma->vm_next) {