Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/mm/util.c b/mm/util.c
index 9e3ebd2..3ad6db9 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -6,6 +7,7 @@
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/security.h>
#include <linux/swap.h>
@@ -14,18 +16,18 @@
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/userfaultfd_k.h>
+#include <linux/elf.h>
+#include <linux/elf-randomize.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/processor.h>
+#include <linux/sizes.h>
+#include <linux/compat.h>
-#include <asm/sections.h>
#include <linux/uaccess.h>
#include "internal.h"
-static inline int is_kernel_rodata(unsigned long addr)
-{
- return addr >= (unsigned long)__start_rodata &&
- addr < (unsigned long)__end_rodata;
-}
-
/**
* kfree_const - conditionally free memory
* @x: pointer to the memory
@@ -43,6 +45,8 @@
* kstrdup - allocate space for and copy an existing string
* @s: the string to duplicate
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Return: newly allocated copy of @s or %NULL in case of error
*/
char *kstrdup(const char *s, gfp_t gfp)
{
@@ -65,9 +69,10 @@
* @s: the string to duplicate
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
*
- * Function returns source string if it is in .rodata section otherwise it
- * fallbacks to kstrdup.
- * Strings allocated by kstrdup_const should be freed by kfree_const.
+ * Note: Strings allocated by kstrdup_const should be freed by kfree_const.
+ *
+ * Return: source string if it is in .rodata section otherwise
+ * fallback to kstrdup.
*/
const char *kstrdup_const(const char *s, gfp_t gfp)
{
@@ -85,6 +90,8 @@
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
*
* Note: Use kmemdup_nul() instead if the size is known exactly.
+ *
+ * Return: newly allocated copy of @s or %NULL in case of error
*/
char *kstrndup(const char *s, size_t max, gfp_t gfp)
{
@@ -110,6 +117,8 @@
* @src: memory region to duplicate
* @len: memory region length
* @gfp: GFP mask to use
+ *
+ * Return: newly allocated copy of @src or %NULL in case of error
*/
void *kmemdup(const void *src, size_t len, gfp_t gfp)
{
@@ -127,6 +136,9 @@
* @s: The data to stringify
* @len: The size of the data
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Return: newly allocated copy of @s with NUL-termination or %NULL in
+ * case of error
*/
char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
{
@@ -150,14 +162,14 @@
* @src: source address in user space
* @len: number of bytes to copy
*
- * Returns an ERR_PTR() on failure. Result is physically
+ * Return: an ERR_PTR() on failure. Result is physically
* contiguous, to be freed by kfree().
*/
void *memdup_user(const void __user *src, size_t len)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER);
+ p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -176,7 +188,7 @@
* @src: source address in user space
* @len: number of bytes to copy
*
- * Returns an ERR_PTR() on failure. Result may be not
+ * Return: an ERR_PTR() on failure. Result may be not
* physically contiguous. Use kvfree() to free.
*/
void *vmemdup_user(const void __user *src, size_t len)
@@ -200,6 +212,8 @@
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
char *strndup_user(const char __user *s, long n)
{
@@ -231,7 +245,7 @@
* @src: source address in user space
* @len: number of bytes to copy
*
- * Returns an ERR_PTR() on failure.
+ * Return: an ERR_PTR() on failure.
*/
void *memdup_user_nul(const void __user *src, size_t len)
{
@@ -286,7 +300,105 @@
return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
}
-#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
+#endif
+
+unsigned long randomize_stack_top(unsigned long stack_top)
+{
+ unsigned long random_variable = 0;
+
+ if (current->flags & PF_RANDOMIZE) {
+ random_variable = get_random_long();
+ random_variable &= STACK_RND_MASK;
+ random_variable <<= PAGE_SHIFT;
+ }
+#ifdef CONFIG_STACK_GROWSUP
+ return PAGE_ALIGN(stack_top) + random_variable;
+#else
+ return PAGE_ALIGN(stack_top) - random_variable;
+#endif
+}
+
+#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ /* Is the current task 32bit ? */
+ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
+ return randomize_page(mm->brk, SZ_32M);
+
+ return randomize_page(mm->brk, SZ_1G);
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+ unsigned long rnd;
+
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ if (is_compat_task())
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
+ else
+#endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+
+ return rnd << PAGE_SHIFT;
+}
+
+static int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
+ return 1;
+
+ return sysctl_legacy_va_layout;
+}
+
+/*
+ * Leave enough space between the mmap area and the stack to honour ulimit in
+ * the face of randomisation.
+ */
+#define MIN_GAP (SZ_128M)
+#define MAX_GAP (STACK_TOP / 6 * 5)
+
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
+{
+ unsigned long gap = rlim_stack->rlim_cur;
+ unsigned long pad = stack_guard_gap;
+
+ /* Account for stack randomization if necessary */
+ if (current->flags & PF_RANDOMIZE)
+ pad += (STACK_RND_MASK << PAGE_SHIFT);
+
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return PAGE_ALIGN(STACK_TOP - gap - rnd);
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+ unsigned long random_factor = 0UL;
+
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+ if (mmap_is_legacy(rlim_stack)) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+}
+#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
mm->mmap_base = TASK_UNMAPPED_BASE;
@@ -294,52 +406,79 @@
}
#endif
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- * If the architecture does not support this function, simply return with no
- * pages pinned.
+/**
+ * __account_locked_vm - account locked pages to an mm's locked_vm
+ * @mm: mm to account against
+ * @pages: number of pages to account
+ * @inc: %true if @pages should be considered positive, %false if not
+ * @task: task used to check RLIMIT_MEMLOCK
+ * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
+ *
+ * Assumes @task and @mm are valid (i.e. at least one reference on each), and
+ * that mmap_sem is held as writer.
+ *
+ * Return:
+ * * 0 on success
+ * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
*/
-int __weak __get_user_pages_fast(unsigned long start,
- int nr_pages, int write, struct page **pages)
+int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
+ struct task_struct *task, bool bypass_rlim)
{
- return 0;
+ unsigned long locked_vm, limit;
+ int ret = 0;
+
+ lockdep_assert_held_write(&mm->mmap_sem);
+
+ locked_vm = mm->locked_vm;
+ if (inc) {
+ if (!bypass_rlim) {
+ limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked_vm + pages > limit)
+ ret = -ENOMEM;
+ }
+ if (!ret)
+ mm->locked_vm = locked_vm + pages;
+ } else {
+ WARN_ON_ONCE(pages > locked_vm);
+ mm->locked_vm = locked_vm - pages;
+ }
+
+ pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid,
+ (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT,
+ locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK),
+ ret ? " - exceeded" : "");
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(__get_user_pages_fast);
+EXPORT_SYMBOL_GPL(__account_locked_vm);
/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
+ * account_locked_vm - account locked pages to an mm's locked_vm
+ * @mm: mm to account against, may be NULL
+ * @pages: number of pages to account
+ * @inc: %true if @pages should be considered positive, %false if not
*
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
+ * Assumes a non-NULL @mm is valid (i.e. at least one reference on it).
*
- * get_user_pages_fast provides equivalent functionality to get_user_pages,
- * operating on current and current->mm, with force=0 and vma=NULL. However
- * unlike get_user_pages, it must be called without mmap_sem held.
- *
- * get_user_pages_fast may take mmap_sem and page table locks, so no
- * assumptions can be made about lack of locking. get_user_pages_fast is to be
- * implemented in a way that is advantageous (vs get_user_pages()) when the
- * user memory area is already faulted in and present in ptes. However if the
- * pages have to be faulted in, it may turn out to be slightly slower so
- * callers need to carefully consider what to use. On many architectures,
- * get_user_pages_fast simply falls back to get_user_pages.
+ * Return:
+ * * 0 on success, or if mm is NULL
+ * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
*/
-int __weak get_user_pages_fast(unsigned long start,
- int nr_pages, int write, struct page **pages)
+int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
{
- return get_user_pages_unlocked(start, nr_pages, pages,
- write ? FOLL_WRITE : 0);
+ int ret;
+
+ if (pages == 0 || !mm)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+ ret = __account_locked_vm(mm, pages, inc, current,
+ capable(CAP_IPC_LOCK));
+ up_write(&mm->mmap_sem);
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(get_user_pages_fast);
+EXPORT_SYMBOL_GPL(account_locked_vm);
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
@@ -393,6 +532,8 @@
*
* Please note that any use of gfp flags outside of GFP_KERNEL is careful to not
* fall back to vmalloc.
+ *
+ * Return: pointer to the allocated memory of %NULL in case of failure
*/
void *kvmalloc_node(size_t size, gfp_t flags, int node)
{
@@ -442,7 +583,7 @@
* It is slightly more efficient to use kfree() or vfree() if you are certain
* that you know which one to use.
*
- * Context: Any context except NMI.
+ * Context: Either preemptible task context or not-NMI interrupt.
*/
void kvfree(const void *addr)
{
@@ -485,7 +626,7 @@
return true;
if (PageHuge(page))
return false;
- for (i = 0; i < hpage_nr_pages(page); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
if (atomic_read(&page[i]._mapcount) >= 0)
return true;
}
@@ -600,7 +741,7 @@
if (sysctl_overcommit_kbytes)
allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
else
- allowed = ((totalram_pages - hugetlb_total_pages())
+ allowed = ((totalram_pages() - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100);
allowed += total_swap_pages;
@@ -645,7 +786,7 @@
*/
int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
- long free, allowed, reserve;
+ long allowed;
VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
-(s64)vm_committed_as_batch * num_online_cpus(),
@@ -660,52 +801,9 @@
return 0;
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- free = global_zone_page_state(NR_FREE_PAGES);
- free += global_node_page_state(NR_FILE_PAGES);
-
- /*
- * shmem pages shouldn't be counted as free in this
- * case, they can't be purged, only swapped out, and
- * that won't affect the overall amount of available
- * memory in the system.
- */
- free -= global_node_page_state(NR_SHMEM);
-
- free += get_nr_swap_pages();
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += global_node_page_state(NR_SLAB_RECLAIMABLE);
-
- /*
- * Part of the kernel memory, which can be released
- * under memory pressure.
- */
- free += global_node_page_state(
- NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
-
- /*
- * Leave reserved pages. The pages are not for anonymous pages.
- */
- if (free <= totalreserve_pages)
+ if (pages > totalram_pages() + total_swap_pages)
goto error;
- else
- free -= totalreserve_pages;
-
- /*
- * Reserve some for root
- */
- if (!cap_sys_admin)
- free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-
- if (free > pages)
- return 0;
-
- goto error;
+ return 0;
}
allowed = vm_commit_limit();
@@ -719,7 +817,8 @@
* Don't let a single process grow so big a user can't recover
*/
if (mm) {
- reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+ long reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+
allowed -= min_t(long, mm->total_vm / 32, reserve);
}
@@ -737,7 +836,8 @@
* @buffer: the buffer to copy to.
* @buflen: the length of the buffer. Larger cmdline values are truncated
* to this length.
- * Returns the size of the cmdline field copied. Note that the copy does
+ *
+ * Return: the size of the cmdline field copied. Note that the copy does
* not guarantee an ending NULL byte.
*/
int get_cmdline(struct task_struct *task, char *buffer, int buflen)
@@ -751,12 +851,12 @@
if (!mm->arg_end)
goto out_mm; /* Shh! No looking before we're done */
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
arg_start = mm->arg_start;
arg_end = mm->arg_end;
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
len = arg_end - arg_start;
@@ -788,3 +888,16 @@
out:
return res;
}
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+ char *addr1, *addr2;
+ int ret;
+
+ addr1 = kmap_atomic(page1);
+ addr2 = kmap_atomic(page2);
+ ret = memcmp(addr1, addr2, PAGE_SIZE);
+ kunmap_atomic(addr2);
+ kunmap_atomic(addr1);
+ return ret;
+}