Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e899460..a2adf95 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -26,6 +26,7 @@
#include <linux/page_ref.h>
#include <linux/memremap.h>
#include <linux/overflow.h>
+#include <linux/sizes.h>
struct mempolicy;
struct anon_vma;
@@ -48,7 +49,32 @@
static inline void set_max_mapnr(unsigned long limit) { }
#endif
-extern unsigned long totalram_pages;
+extern atomic_long_t _totalram_pages;
+static inline unsigned long totalram_pages(void)
+{
+ return (unsigned long)atomic_long_read(&_totalram_pages);
+}
+
+static inline void totalram_pages_inc(void)
+{
+ atomic_long_inc(&_totalram_pages);
+}
+
+static inline void totalram_pages_dec(void)
+{
+ atomic_long_dec(&_totalram_pages);
+}
+
+static inline void totalram_pages_add(long count)
+{
+ atomic_long_add(count, &_totalram_pages);
+}
+
+static inline void totalram_pages_set(long val)
+{
+ atomic_long_set(&_totalram_pages, val);
+}
+
extern void * high_memory;
extern int page_cluster;
@@ -73,6 +99,17 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
+/*
+ * Architectures that support memory tagging (assigning tags to memory regions,
+ * embedding these tags into addresses that point to these memory regions, and
+ * checking that the memory and the pointer tags match on memory accesses)
+ * redefine this macro to strip tags from pointers.
+ * It's defined as noop for arcitectures that don't support memory tagging.
+ */
+#ifndef untagged_addr
+#define untagged_addr(addr) (addr)
+#endif
+
#ifndef __pa_symbol
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif
@@ -98,10 +135,45 @@
/*
* On some architectures it is expensive to call memset() for small sizes.
- * Those architectures should provide their own implementation of "struct page"
- * zeroing by defining this macro in <asm/pgtable.h>.
+ * If an architecture decides to implement their own version of
+ * mm_zero_struct_page they should wrap the defines below in a #ifndef and
+ * define their own version of this macro in <asm/pgtable.h>
*/
-#ifndef mm_zero_struct_page
+#if BITS_PER_LONG == 64
+/* This function must be updated when the size of struct page grows above 80
+ * or reduces below 56. The idea that compiler optimizes out switch()
+ * statement, and only leaves move/store instructions. Also the compiler can
+ * combine write statments if they are both assignments and can be reordered,
+ * this can result in several of the writes here being dropped.
+ */
+#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
+static inline void __mm_zero_struct_page(struct page *page)
+{
+ unsigned long *_pp = (void *)page;
+
+ /* Check that struct page is either 56, 64, 72, or 80 bytes */
+ BUILD_BUG_ON(sizeof(struct page) & 7);
+ BUILD_BUG_ON(sizeof(struct page) < 56);
+ BUILD_BUG_ON(sizeof(struct page) > 80);
+
+ switch (sizeof(struct page)) {
+ case 80:
+ _pp[9] = 0; /* fallthrough */
+ case 72:
+ _pp[8] = 0; /* fallthrough */
+ case 64:
+ _pp[7] = 0; /* fallthrough */
+ case 56:
+ _pp[6] = 0;
+ _pp[5] = 0;
+ _pp[4] = 0;
+ _pp[3] = 0;
+ _pp[2] = 0;
+ _pp[1] = 0;
+ _pp[0] = 0;
+ }
+}
+#else
#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
#endif
@@ -146,6 +218,8 @@
/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
+#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+
/*
* Linux kernel virtual memory manager primitives.
* The idea being to have a "virtual" mm in the same way
@@ -467,16 +541,30 @@
vma->vm_ops = NULL;
}
+static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+{
+ return !vma->vm_ops;
+}
+
+#ifdef CONFIG_SHMEM
+/*
+ * The vma_is_shmem is not inline because it is used only by slow
+ * paths in userfault.
+ */
+bool vma_is_shmem(struct vm_area_struct *vma);
+#else
+static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+#endif
+
+int vma_is_stack_for_current(struct vm_area_struct *vma);
+
/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
struct mmu_gather;
struct inode;
-#define page_private(page) ((page)->private)
-#define set_page_private(page, v) ((page)->private = (v))
-
-#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline int pmd_devmap(pmd_t pmd)
{
return 0;
@@ -562,6 +650,11 @@
return false;
#endif
}
+
+#ifndef is_ioremap_addr
+#define is_ioremap_addr(x) is_vmalloc_addr(x)
+#endif
+
#ifdef CONFIG_MMU
extern int is_vmalloc_or_module_addr(const void *x);
#else
@@ -602,11 +695,6 @@
extern void kvfree(const void *addr);
-static inline atomic_t *compound_mapcount_ptr(struct page *page)
-{
- return &page[1].compound_mapcount;
-}
-
static inline int compound_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
@@ -712,6 +800,24 @@
page[1].compound_order = order;
}
+/* Returns the number of pages in this potentially compound page. */
+static inline unsigned long compound_nr(struct page *page)
+{
+ return 1UL << compound_order(page);
+}
+
+/* Returns the number of bytes in this potentially compound page. */
+static inline unsigned long page_size(struct page *page)
+{
+ return PAGE_SIZE << compound_order(page);
+}
+
+/* Returns the number of bits needed for the number of bytes in a page */
+static inline unsigned int page_shift(struct page *page)
+{
+ return PAGE_SHIFT + compound_order(page);
+}
+
void free_compound_page(struct page *page);
#ifdef CONFIG_MMU
@@ -804,6 +910,7 @@
#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
+#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
/*
* Define the bit shifts to access each section. For non-existent
@@ -814,6 +921,7 @@
#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
+#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0))
/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
#ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -836,6 +944,7 @@
#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1)
+#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1)
#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
static inline enum zone_type page_zonenum(const struct page *page)
@@ -848,6 +957,8 @@
{
return page_zonenum(page) == ZONE_DEVICE;
}
+extern void memmap_init_zone_device(struct zone *, unsigned long,
+ unsigned long, struct dev_pagemap *);
#else
static inline bool is_zone_device_page(const struct page *page)
{
@@ -856,8 +967,6 @@
#endif
#ifdef CONFIG_DEV_PAGEMAP_OPS
-void dev_pagemap_get_ops(void);
-void dev_pagemap_put_ops(void);
void __put_devmap_managed_page(struct page *page);
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
static inline bool put_devmap_managed_page(struct page *page)
@@ -868,7 +977,6 @@
return false;
switch (page->pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
- case MEMORY_DEVICE_PUBLIC:
case MEMORY_DEVICE_FS_DAX:
__put_devmap_managed_page(page);
return true;
@@ -878,42 +986,32 @@
return false;
}
-static inline bool is_device_private_page(const struct page *page)
-{
- return is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PRIVATE;
-}
-
-static inline bool is_device_public_page(const struct page *page)
-{
- return is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PUBLIC;
-}
-
#else /* CONFIG_DEV_PAGEMAP_OPS */
-static inline void dev_pagemap_get_ops(void)
-{
-}
-
-static inline void dev_pagemap_put_ops(void)
-{
-}
-
static inline bool put_devmap_managed_page(struct page *page)
{
return false;
}
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
static inline bool is_device_private_page(const struct page *page)
{
- return false;
+ return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+ IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
+ is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_PRIVATE;
}
-static inline bool is_device_public_page(const struct page *page)
+static inline bool is_pci_p2pdma_page(const struct page *page)
{
- return false;
+ return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+ IS_ENABLED(CONFIG_PCI_P2PDMA) &&
+ is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
+
+/* 127: arbitrary random number, small enough to assemble well */
+#define page_ref_zero_or_close_to_overflow(page) \
+ ((unsigned int) page_ref_count(page) + 127u <= 127u)
static inline void get_page(struct page *page)
{
@@ -922,10 +1020,19 @@
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_refcount.
*/
- VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
page_ref_inc(page);
}
+static inline __must_check bool try_get_page(struct page *page)
+{
+ page = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(page) <= 0))
+ return false;
+ page_ref_inc(page);
+ return true;
+}
+
static inline void put_page(struct page *page)
{
page = compound_head(page);
@@ -943,6 +1050,31 @@
__put_page(page);
}
+/**
+ * put_user_page() - release a gup-pinned page
+ * @page: pointer to page to be released
+ *
+ * Pages that were pinned via get_user_pages*() must be released via
+ * either put_user_page(), or one of the put_user_pages*() routines
+ * below. This is so that eventually, pages that are pinned via
+ * get_user_pages*() can be separately tracked and uniquely handled. In
+ * particular, interactions with RDMA and filesystems need special
+ * handling.
+ *
+ * put_user_page() and put_page() are not interchangeable, despite this early
+ * implementation that makes them look the same. put_user_page() calls must
+ * be perfectly matched up with get_user_page() calls.
+ */
+static inline void put_user_page(struct page *page)
+{
+ put_page(page);
+}
+
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+ bool make_dirty);
+
+void put_user_pages(struct page **pages, unsigned long npages);
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -1081,6 +1213,32 @@
}
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_KASAN_SW_TAGS
+static inline u8 page_kasan_tag(const struct page *page)
+{
+ return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
+}
+
+static inline void page_kasan_tag_set(struct page *page, u8 tag)
+{
+ page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+ page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+}
+
+static inline void page_kasan_tag_reset(struct page *page)
+{
+ page_kasan_tag_set(page, 0xff);
+}
+#else
+static inline u8 page_kasan_tag(const struct page *page)
+{
+ return 0xff;
+}
+
+static inline void page_kasan_tag_set(struct page *page, u8 tag) { }
+static inline void page_kasan_tag_reset(struct page *page) { }
+#endif
+
static inline struct zone *page_zone(const struct page *page)
{
return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
@@ -1247,52 +1405,6 @@
}
/*
- * Different kinds of faults, as returned by handle_mm_fault().
- * Used to decide whether a process gets delivered SIGBUS or
- * just gets major/minor fault counters bumped up.
- */
-
-#define VM_FAULT_OOM 0x0001
-#define VM_FAULT_SIGBUS 0x0002
-#define VM_FAULT_MAJOR 0x0004
-#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
-#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */
-#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */
-#define VM_FAULT_SIGSEGV 0x0040
-
-#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
-#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
-#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
-#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
-#define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */
-#define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables
- * and needs fsync() to complete (for
- * synchronous page faults in DAX) */
-
-#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
- VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
- VM_FAULT_FALLBACK)
-
-#define VM_FAULT_RESULT_TRACE \
- { VM_FAULT_OOM, "OOM" }, \
- { VM_FAULT_SIGBUS, "SIGBUS" }, \
- { VM_FAULT_MAJOR, "MAJOR" }, \
- { VM_FAULT_WRITE, "WRITE" }, \
- { VM_FAULT_HWPOISON, "HWPOISON" }, \
- { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
- { VM_FAULT_SIGSEGV, "SIGSEGV" }, \
- { VM_FAULT_NOPAGE, "NOPAGE" }, \
- { VM_FAULT_LOCKED, "LOCKED" }, \
- { VM_FAULT_RETRY, "RETRY" }, \
- { VM_FAULT_FALLBACK, "FALLBACK" }, \
- { VM_FAULT_DONE_COW, "DONE_COW" }, \
- { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }
-
-/* Encode hstate index for a hwpoisoned large page */
-#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
-
-/*
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
*/
extern void pagefault_out_of_memory(void);
@@ -1307,7 +1419,11 @@
extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
+#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
+#else
+static inline bool can_do_mlock(void) { return false; }
+#endif
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
@@ -1320,10 +1436,8 @@
pgoff_t last_index; /* Highest page->index to unmap */
};
-struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
- pte_t pte, bool with_public_device);
-#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false)
-
+struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+ pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t pmd);
@@ -1334,59 +1448,15 @@
void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long start, unsigned long end);
-/**
- * mm_walk - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- * this handler should only handle pud_trans_huge() puds.
- * the pmd_entry or pte_entry callbacks will be used for
- * regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
- * this handler is required to be able to handle
- * pmd_trans_huge() pmds. They may simply choose to
- * split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
- * @hugetlb_entry: if set, called for each hugetlb entry
- * @test_walk: caller specific callback function to determine whether
- * we walk over the current vma or not. Returning 0
- * value means "do page table walk over the current vma,"
- * and a negative one means "abort current page table walk
- * right now." 1 means "skip the current vma."
- * @mm: mm_struct representing the target process of page table walk
- * @vma: vma currently walked (NULL if walking outside vmas)
- * @private: private data for callbacks' usage
- *
- * (see the comment on walk_page_range() for more details)
- */
-struct mm_walk {
- int (*pud_entry)(pud_t *pud, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pte_entry)(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pte_hole)(unsigned long addr, unsigned long next,
- struct mm_walk *walk);
- int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long next,
- struct mm_walk *walk);
- int (*test_walk)(unsigned long addr, unsigned long next,
- struct mm_walk *walk);
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- void *private;
-};
+struct mmu_notifier_range;
-int walk_page_range(unsigned long addr, unsigned long end,
- struct mm_walk *walk);
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma);
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
- unsigned long *start, unsigned long *end,
- pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
+ struct mmu_notifier_range *range,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
unsigned long *pfn);
int follow_phys(struct vm_area_struct *vma, unsigned long address,
@@ -1458,21 +1528,13 @@
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
-#ifdef CONFIG_FS_DAX
-long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas);
-#else
-static inline long get_user_pages_longterm(unsigned long start,
- unsigned long nr_pages, unsigned int gup_flags,
- struct page **pages, struct vm_area_struct **vmas)
-{
- return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
-}
-#endif /* CONFIG_FS_DAX */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages);
+int get_user_pages_fast(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
+
+int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
+int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
+ struct task_struct *task, bool bypass_rlim);
/* Container for pinned pfns / pages */
struct frame_vector {
@@ -1547,23 +1609,6 @@
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
-static inline bool vma_is_anonymous(struct vm_area_struct *vma)
-{
- return !vma->vm_ops;
-}
-
-#ifdef CONFIG_SHMEM
-/*
- * The vma_is_shmem is not inline because it is used only by slow
- * paths in userfault.
- */
-bool vma_is_shmem(struct vm_area_struct *vma);
-#else
-static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
-#endif
-
-int vma_is_stack_for_current(struct vm_area_struct *vma);
-
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
@@ -1681,7 +1726,7 @@
}
#endif
-#ifndef __HAVE_ARCH_PTE_DEVMAP
+#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pte_devmap(pte_t pte)
{
return 0;
@@ -1797,8 +1842,8 @@
static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
#endif
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
/*
* The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1880,13 +1925,6 @@
return true;
}
-/* Reset page->mapping so free_pages_check won't complain. */
-static inline void pte_lock_deinit(struct page *page)
-{
- page->mapping = NULL;
- ptlock_free(page);
-}
-
#else /* !USE_SPLIT_PTE_PTLOCKS */
/*
* We use mm->page_table_lock to guard all pagetable pages of the mm.
@@ -1897,7 +1935,7 @@
}
static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct page *page) { return true; }
-static inline void pte_lock_deinit(struct page *page) {}
+static inline void ptlock_free(struct page *page) {}
#endif /* USE_SPLIT_PTE_PTLOCKS */
static inline void pgtable_init(void)
@@ -1906,7 +1944,7 @@
pgtable_cache_init();
}
-static inline bool pgtable_page_ctor(struct page *page)
+static inline bool pgtable_pte_page_ctor(struct page *page)
{
if (!ptlock_init(page))
return false;
@@ -1915,9 +1953,9 @@
return true;
}
-static inline void pgtable_page_dtor(struct page *page)
+static inline void pgtable_pte_page_dtor(struct page *page)
{
- pte_lock_deinit(page);
+ ptlock_free(page);
__ClearPageTable(page);
dec_zone_page_state(page, NR_PAGETABLE);
}
@@ -1936,18 +1974,17 @@
pte_unmap(pte); \
} while (0)
-#define pte_alloc(mm, pmd, address) \
- (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
#define pte_alloc_map(mm, pmd, address) \
- (pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+ (pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- (pte_alloc(mm, pmd, address) ? \
+ (pte_alloc(mm, pmd) ? \
NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
#define pte_alloc_kernel(pmd, address) \
- ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
#if USE_SPLIT_PMD_PTLOCKS
@@ -2034,7 +2071,7 @@
* Return pages freed into the buddy system.
*/
extern unsigned long free_reserved_area(void *start, void *end,
- int poison, char *s);
+ int poison, const char *s);
#ifdef CONFIG_HIGHMEM
/*
@@ -2151,7 +2188,7 @@
struct mminit_pfnnid_cache *state);
#endif
-#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
void zero_resv_unavail(void);
#else
static inline void zero_resv_unavail(void) {}
@@ -2182,6 +2219,7 @@
/* page_alloc.c */
extern int min_free_kbytes;
+extern int watermark_boost_factor;
extern int watermark_scale_factor;
/* nommu.c */
@@ -2285,6 +2323,8 @@
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
+unsigned long randomize_stack_top(unsigned long stack_top);
+
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
extern unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2294,6 +2334,8 @@
unsigned long len, unsigned long prot, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
struct list_head *uf);
+extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
@@ -2374,8 +2416,7 @@
void task_dirty_inc(struct task_struct *tsk);
/* readahead.c */
-#define VM_MAX_READAHEAD 128 /* kbytes */
-#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
+#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
pgoff_t offset, unsigned long nr_to_read);
@@ -2492,11 +2533,15 @@
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num);
+int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num);
+vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn);
-int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, pgprot_t pgprot);
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn);
vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
unsigned long addr, pfn_t pfn);
@@ -2515,32 +2560,6 @@
return VM_FAULT_NOPAGE;
}
-static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma,
- unsigned long addr, pfn_t pfn)
-{
- int err = vm_insert_mixed(vma, addr, pfn);
-
- if (err == -ENOMEM)
- return VM_FAULT_OOM;
- if (err < 0 && err != -EBUSY)
- return VM_FAULT_SIGBUS;
-
- return VM_FAULT_NOPAGE;
-}
-
-static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
- unsigned long addr, unsigned long pfn)
-{
- int err = vm_insert_pfn(vma, addr, pfn);
-
- if (err == -ENOMEM)
- return VM_FAULT_OOM;
- if (err < 0 && err != -EBUSY)
- return VM_FAULT_SIGBUS;
-
- return VM_FAULT_NOPAGE;
-}
-
static inline vm_fault_t vmf_error(int err)
{
if (err == -ENOMEM)
@@ -2548,16 +2567,8 @@
return VM_FAULT_SIGBUS;
}
-struct page *follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int foll_flags,
- unsigned int *page_mask);
-
-static inline struct page *follow_page(struct vm_area_struct *vma,
- unsigned long address, unsigned int foll_flags)
-{
- unsigned int unused_page_mask;
- return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
-}
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+ unsigned int foll_flags);
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
@@ -2576,6 +2587,35 @@
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
+#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
+
+/*
+ * NOTE on FOLL_LONGTERM:
+ *
+ * FOLL_LONGTERM indicates that the page will be held for an indefinite time
+ * period _often_ under userspace control. This is contrasted with
+ * iov_iter_get_pages() where usages which are transient.
+ *
+ * FIXME: For pages which are part of a filesystem, mappings are subject to the
+ * lifetime enforced by the filesystem and we need guarantees that longterm
+ * users like RDMA and V4L2 only establish mappings which coordinate usage with
+ * the filesystem. Ideas for this coordination include revoking the longterm
+ * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was
+ * added after the problem with filesystems was found FS DAX VMAs are
+ * specifically failed. Filesystem pages are still subject to bugs and use of
+ * FOLL_LONGTERM should be avoided on those pages.
+ *
+ * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call.
+ * Currently only get_user_pages() and get_user_pages_fast() support this flag
+ * and calls to get_user_pages_[un]locked are specifically not allowed. This
+ * is due to an incompatibility with the FS DAX check and
+ * FAULT_FLAG_ALLOW_RETRY
+ *
+ * In the CMA case: longterm pins in a CMA region would unnecessarily fragment
+ * that region. And so CMA attempts to migrate the page before pinning when
+ * FOLL_LONGTERM is specified.
+ */
static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
{
@@ -2588,8 +2628,7 @@
return 0;
}
-typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
- void *data);
+typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
@@ -2603,37 +2642,62 @@
int enable) { }
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern bool _debug_pagealloc_enabled;
-extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON
+DECLARE_STATIC_KEY_TRUE(init_on_alloc);
+#else
+DECLARE_STATIC_KEY_FALSE(init_on_alloc);
+#endif
+static inline bool want_init_on_alloc(gfp_t flags)
+{
+ if (static_branch_unlikely(&init_on_alloc) &&
+ !page_poisoning_enabled())
+ return true;
+ return flags & __GFP_ZERO;
+}
+
+#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON
+DECLARE_STATIC_KEY_TRUE(init_on_free);
+#else
+DECLARE_STATIC_KEY_FALSE(init_on_free);
+#endif
+static inline bool want_init_on_free(void)
+{
+ return static_branch_unlikely(&init_on_free) &&
+ !page_poisoning_enabled();
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
+DECLARE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
+#else
+DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+#endif
static inline bool debug_pagealloc_enabled(void)
{
- return _debug_pagealloc_enabled;
+ if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC))
+ return false;
+
+ return static_branch_unlikely(&_debug_pagealloc_enabled);
}
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
{
- if (!debug_pagealloc_enabled())
- return;
-
__kernel_map_pages(page, numpages, enable);
}
#ifdef CONFIG_HIBERNATION
extern bool kernel_page_present(struct page *page);
#endif /* CONFIG_HIBERNATION */
-#else /* CONFIG_DEBUG_PAGEALLOC */
+#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
#ifdef CONFIG_HIBERNATION
static inline bool kernel_page_present(struct page *page) { return true; }
#endif /* CONFIG_HIBERNATION */
-static inline bool debug_pagealloc_enabled(void)
-{
- return false;
-}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
@@ -2669,11 +2733,17 @@
#endif
const char * arch_vma_name(struct vm_area_struct *vma);
+#ifdef CONFIG_MMU
void print_vma_addr(char *prefix, unsigned long rip);
+#else
+static inline void print_vma_addr(char *prefix, unsigned long rip)
+{
+}
+#endif
void *sparse_buffer_alloc(unsigned long size);
-struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
- struct vmem_altmap *altmap);
+struct page * __populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -2763,11 +2833,9 @@
bool allow_pagefault);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
-extern struct page_ext_operations debug_guardpage_ops;
-
#ifdef CONFIG_DEBUG_PAGEALLOC
extern unsigned int _debug_guardpage_minorder;
-extern bool _debug_guardpage_enabled;
+DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static inline unsigned int debug_guardpage_minorder(void)
{
@@ -2776,21 +2844,15 @@
static inline bool debug_guardpage_enabled(void)
{
- return _debug_guardpage_enabled;
+ return static_branch_unlikely(&_debug_guardpage_enabled);
}
static inline bool page_is_guard(struct page *page)
{
- struct page_ext *page_ext;
-
if (!debug_guardpage_enabled())
return false;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
- return false;
-
- return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+ return PageGuard(page);
}
#else
static inline unsigned int debug_guardpage_minorder(void) { return 0; }
@@ -2804,5 +2866,12 @@
static inline void setup_nr_node_ids(void) {}
#endif
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+ return !memcmp_pages(page1, page2);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */