Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 149b6f4..e08c941 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Simple NUMA memory policy for the Linux kernel.
*
* Copyright 2003,2004 Andi Kleen, SuSE Labs.
* (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc.
- * Subject to the GNU Public License, version 2.
*
* NUMA policy allows the user to give hints in which node(s) memory should
* be allocated.
@@ -68,7 +68,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/mempolicy.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/kernel.h>
@@ -306,7 +306,7 @@
else {
nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
*nodes);
- pol->w.cpuset_mems_allowed = tmp;
+ pol->w.cpuset_mems_allowed = *nodes;
}
if (nodes_empty(tmp))
@@ -350,7 +350,7 @@
{
if (!pol)
return;
- if (!mpol_store_user_nodemask(pol) &&
+ if (!mpol_store_user_nodemask(pol) && !(pol->flags & MPOL_F_LOCAL) &&
nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
return;
@@ -403,7 +403,7 @@
},
};
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags);
struct queue_pages {
@@ -428,6 +428,16 @@
return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
}
+/*
+ * queue_pages_pmd() has four possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ * specified.
+ * 2 - THP was split.
+ * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+ * existing page was already on a node that does not follow the
+ * policy.
+ */
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
@@ -437,25 +447,29 @@
unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) {
- ret = 1;
+ ret = -EIO;
goto unlock;
}
page = pmd_page(*pmd);
if (is_huge_zero_page(page)) {
spin_unlock(ptl);
__split_huge_pmd(walk->vma, pmd, addr, false, NULL);
+ ret = 2;
goto out;
}
- if (!queue_pages_required(page, qp)) {
- ret = 1;
+ if (!queue_pages_required(page, qp))
goto unlock;
- }
- ret = 1;
flags = qp->flags;
/* go to thp migration */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(walk->vma) ||
+ migrate_page_add(page, qp->pagelist, flags)) {
+ ret = 1;
+ goto unlock;
+ }
+ } else
+ ret = -EIO;
unlock:
spin_unlock(ptl);
out:
@@ -465,6 +479,13 @@
/*
* Scan through pages checking if pages follow certain conditions,
* and move them to the pagelist if they do.
+ *
+ * queue_pages_pte_range() has three possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ * specified.
+ * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
+ * on a node that does not follow the policy.
*/
static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
@@ -474,15 +495,17 @@
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
int ret;
+ bool has_unmovable = false;
pte_t *pte;
spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
- if (ret)
- return 0;
+ if (ret != 2)
+ return ret;
}
+ /* THP was split, fall through to pte walk */
if (pmd_trans_unstable(pmd))
return 0;
@@ -502,11 +525,30 @@
continue;
if (!queue_pages_required(page, qp))
continue;
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ /* MPOL_MF_STRICT must be specified if we get here */
+ if (!vma_migratable(vma)) {
+ has_unmovable = true;
+ break;
+ }
+
+ /*
+ * Do not abort immediately since there may be
+ * temporary off LRU pages in the range. Still
+ * need migrate other LRU pages.
+ */
+ if (migrate_page_add(page, qp->pagelist, flags))
+ has_unmovable = true;
+ } else
+ break;
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
- return 0;
+
+ if (has_unmovable)
+ return 1;
+
+ return addr != end ? -EIO : 0;
}
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
@@ -576,7 +618,12 @@
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
- if (!vma_migratable(vma))
+ /*
+ * Need check MPOL_MF_STRICT to return -EIO if possible
+ * regardless of vma_migratable
+ */
+ if (!vma_migratable(vma) &&
+ !(flags & MPOL_MF_STRICT))
return 1;
if (endvma > end)
@@ -603,17 +650,31 @@
}
/* queue pages from current vma */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & MPOL_MF_VALID)
return 0;
return 1;
}
+static const struct mm_walk_ops queue_pages_walk_ops = {
+ .hugetlb_entry = queue_pages_hugetlb,
+ .pmd_entry = queue_pages_pte_range,
+ .test_walk = queue_pages_test_walk,
+};
+
/*
* Walk through page tables and collect pages to be migrated.
*
* If pages found in a given range are on a set of nodes (determined by
* @nodes and @flags,) it's isolated and queued to the pagelist which is
- * passed via @private.)
+ * passed via @private.
+ *
+ * queue_pages_range() has three possible return values:
+ * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ * specified.
+ * 0 - queue pages successfully or no misplaced page.
+ * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or
+ * memory range specified by nodemask and maxnode points outside
+ * your accessible address space (-EFAULT)
*/
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -626,15 +687,8 @@
.nmask = nodes,
.prev = NULL,
};
- struct mm_walk queue_pages_walk = {
- .hugetlb_entry = queue_pages_hugetlb,
- .pmd_entry = queue_pages_pte_range,
- .test_walk = queue_pages_test_walk,
- .mm = mm,
- .private = &qp,
- };
- return walk_page_range(start, end, &queue_pages_walk);
+ return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
}
/*
@@ -797,16 +851,19 @@
}
}
-static int lookup_node(unsigned long addr)
+static int lookup_node(struct mm_struct *mm, unsigned long addr)
{
struct page *p;
int err;
- err = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
+ int locked = 1;
+ err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked);
if (err >= 0) {
err = page_to_nid(p);
put_page(p);
}
+ if (locked)
+ up_read(&mm->mmap_sem);
return err;
}
@@ -817,7 +874,7 @@
int err;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL;
- struct mempolicy *pol = current->mempolicy;
+ struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL;
if (flags &
~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
@@ -857,7 +914,16 @@
if (flags & MPOL_F_NODE) {
if (flags & MPOL_F_ADDR) {
- err = lookup_node(addr);
+ /*
+ * Take a refcount on the mpol, lookup_node()
+ * wil drop the mmap_sem, so after calling
+ * lookup_node() only "pol" remains valid, "vma"
+ * is stale.
+ */
+ pol_refcount = pol;
+ vma = NULL;
+ mpol_get(pol);
+ err = lookup_node(mm, addr);
if (err < 0)
goto out;
*policy = err;
@@ -892,7 +958,9 @@
out:
mpol_cond_put(pol);
if (vma)
- up_read(¤t->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
+ if (pol_refcount)
+ mpol_put(pol_refcount);
return err;
}
@@ -900,7 +968,7 @@
/*
* page migration, thp tail pages can be passed.
*/
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags)
{
struct page *head = compound_head(page);
@@ -913,8 +981,19 @@
mod_node_page_state(page_pgdat(head),
NR_ISOLATED_ANON + page_is_file_cache(head),
hpage_nr_pages(head));
+ } else if (flags & MPOL_MF_STRICT) {
+ /*
+ * Non-movable page may reach here. And, there may be
+ * temporary off LRU pages or non-LRU movable pages.
+ * Treat them as unmovable pages since they can't be
+ * isolated, so they can't be moved at the moment. It
+ * should return -EIO for this case too.
+ */
+ return -EIO;
}
}
+
+ return 0;
}
/* page allocation callback for NUMA node migration */
@@ -1117,9 +1196,10 @@
}
#else
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags)
{
+ return -EIO;
}
int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
@@ -1142,6 +1222,7 @@
struct mempolicy *new;
unsigned long end;
int err;
+ int ret;
LIST_HEAD(pagelist);
if (flags & ~(unsigned long)MPOL_MF_VALID)
@@ -1203,10 +1284,15 @@
if (err)
goto mpol_out;
- err = queue_pages_range(mm, start, end, nmask,
+ ret = queue_pages_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);
- if (!err)
- err = mbind_range(mm, start, end, new);
+
+ if (ret < 0) {
+ err = ret;
+ goto up_out;
+ }
+
+ err = mbind_range(mm, start, end, new);
if (!err) {
int nr_failed = 0;
@@ -1219,13 +1305,16 @@
putback_movable_pages(&pagelist);
}
- if (nr_failed && (flags & MPOL_MF_STRICT))
+ if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
err = -EIO;
- } else
- putback_movable_pages(&pagelist);
+ } else {
+up_out:
+ if (!list_empty(&pagelist))
+ putback_movable_pages(&pagelist);
+ }
up_write(&mm->mmap_sem);
- mpol_out:
+mpol_out:
mpol_put(new);
return err;
}
@@ -1300,7 +1389,7 @@
nodemask_t *nodes)
{
unsigned long copy = ALIGN(maxnode-1, 64) / 8;
- const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
+ unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
if (copy > nbytes) {
if (copy > PAGE_SIZE)
@@ -1320,6 +1409,7 @@
int err;
unsigned short mode_flags;
+ start = untagged_addr(start);
mode_flags = mode & MPOL_MODE_FLAGS;
mode &= ~MPOL_MODE_FLAGS;
if (mode >= MPOL_MAX)
@@ -1427,10 +1517,6 @@
if (nodes_empty(*new))
goto out_put;
- nodes_and(*new, *new, node_states[N_MEMORY]);
- if (nodes_empty(*new))
- goto out_put;
-
err = security_task_movememory(task);
if (err)
goto out_put;
@@ -1477,7 +1563,9 @@
int uninitialized_var(pval);
nodemask_t nodes;
- if (nmask != NULL && maxnode < MAX_NUMNODES)
+ addr = untagged_addr(addr);
+
+ if (nmask != NULL && maxnode < nr_node_ids)
return -EINVAL;
err = do_get_mempolicy(&pval, &nodes, addr, flags);
@@ -1513,7 +1601,7 @@
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
- nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask)
@@ -2039,43 +2127,25 @@
* If the policy is interleave, or does not allow the current
* node in its nodemask, we allocate the standard way.
*/
- if (pol->mode == MPOL_PREFERRED &&
- !(pol->flags & MPOL_F_LOCAL))
+ if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
hpage_node = pol->v.preferred_node;
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
+ page = __alloc_pages_node(hpage_node,
+ gfp | __GFP_THISNODE, order);
+
/*
- * We cannot invoke reclaim if __GFP_THISNODE
- * is set. Invoking reclaim with
- * __GFP_THISNODE set, would cause THP
- * allocations to trigger heavy swapping
- * despite there may be tons of free memory
- * (including potentially plenty of THP
- * already available in the buddy) on all the
- * other NUMA nodes.
- *
- * At most we could invoke compaction when
- * __GFP_THISNODE is set (but we would need to
- * refrain from invoking reclaim even if
- * compaction returned COMPACT_SKIPPED because
- * there wasn't not enough memory to succeed
- * compaction). For now just avoid
- * __GFP_THISNODE instead of limiting the
- * allocation path to a strict and single
- * compaction invocation.
- *
- * Supposedly if direct reclaim was enabled by
- * the caller, the app prefers THP regardless
- * of the node it comes from so this would be
- * more desiderable behavior than only
- * providing THP originated from the local
- * node in such case.
+ * If hugepage allocations are configured to always
+ * synchronous compact or the vma has been madvised
+ * to prefer hugepage backing, retry allowing remote
+ * memory as well.
*/
- if (!(gfp & __GFP_DIRECT_RECLAIM))
- gfp |= __GFP_THISNODE;
- page = __alloc_pages_node(hpage_node, gfp, order);
+ if (!page && (gfp & __GFP_DIRECT_RECLAIM))
+ page = __alloc_pages_node(hpage_node,
+ gfp | __GFP_NORETRY, order);
+
goto out;
}
}
@@ -2087,6 +2157,7 @@
out:
return page;
}
+EXPORT_SYMBOL(alloc_pages_vma);
/**
* alloc_pages_current - Allocate pages.
@@ -2319,7 +2390,7 @@
unsigned long pgoff;
int thiscpu = raw_smp_processor_id();
int thisnid = cpu_to_node(thiscpu);
- int polnid = -1;
+ int polnid = NUMA_NO_NODE;
int ret = -1;
pol = get_vma_policy(vma, addr);
@@ -2725,12 +2796,11 @@
int mpol_parse_str(char *str, struct mempolicy **mpol)
{
struct mempolicy *new = NULL;
- unsigned short mode;
unsigned short mode_flags;
nodemask_t nodes;
char *nodelist = strchr(str, ':');
char *flags = strchr(str, '=');
- int err = 1;
+ int err = 1, mode;
if (nodelist) {
/* NUL-terminate mode or flags string */
@@ -2745,12 +2815,8 @@
if (flags)
*flags++ = '\0'; /* terminate mode string */
- for (mode = 0; mode < MPOL_MAX; mode++) {
- if (!strcmp(str, policy_modes[mode])) {
- break;
- }
- }
- if (mode >= MPOL_MAX)
+ mode = match_string(policy_modes, MPOL_MAX, str);
+ if (mode < 0)
goto out;
switch (mode) {