Blame - include/linux/pagemap.h - hafnium/third_party/linux.git

blob: fcb3f040102af2d5c71afacb31f8f8140260ae25 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/* SPDX-License-Identifier: GPL-2.0 */
				2	#ifndef _LINUX_PAGEMAP_H
				3	#define _LINUX_PAGEMAP_H
				4
				5	/*
				6	* Copyright 1995 Linus Torvalds
				7	*/
				8	#include <linux/mm.h>
				9	#include <linux/fs.h>
				10	#include <linux/list.h>
				11	#include <linux/highmem.h>
				12	#include <linux/compiler.h>
				13	#include <linux/uaccess.h>
				14	#include <linux/gfp.h>
				15	#include <linux/bitops.h>
				16	#include <linux/hardirq.h> /* for in_interrupt() */
				17	#include <linux/hugetlb_inline.h>
				18
				19	struct pagevec;
				20
				21	/*
				22	* Bits in mapping->flags.
				23	*/
				24	enum mapping_flags {
				25	AS_EIO = 0, /* IO error on async write */
				26	AS_ENOSPC = 1, /* ENOSPC on async write */
				27	AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */
				28	AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */
				29	AS_EXITING = 4, /* final truncate in progress */
				30	/* writeback related tags are not used */
				31	AS_NO_WRITEBACK_TAGS = 5,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	32	AS_THP_SUPPORT = 6, /* THPs supported */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	33	};
				34
				35	/**
				36	* mapping_set_error - record a writeback error in the address_space
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	37	* @mapping: the mapping in which an error should be set
				38	* @error: the error to set in the mapping
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	39	*
				40	* When writeback fails in some way, we must record that error so that
				41	* userspace can be informed when fsync and the like are called. We endeavor
				42	* to report errors on any file that was open at the time of the error. Some
				43	* internal callers also need to know when writeback errors have occurred.
				44	*
				45	* When a writeback error occurs, most filesystems will want to call
				46	* mapping_set_error to record the error in the mapping so that it can be
				47	* reported when the application calls fsync(2).
				48	*/
				49	static inline void mapping_set_error(struct address_space *mapping, int error)
				50	{
				51	if (likely(!error))
				52	return;
				53
				54	/* Record in wb_err for checkers using errseq_t based tracking */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	55	__filemap_set_wb_err(mapping, error);
				56
				57	/* Record it in superblock */
				58	if (mapping->host)
				59	errseq_set(&mapping->host->i_sb->s_wb_err, error);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	60
				61	/* Record it in flags for now, for legacy callers */
				62	if (error == -ENOSPC)
				63	set_bit(AS_ENOSPC, &mapping->flags);
				64	else
				65	set_bit(AS_EIO, &mapping->flags);
				66	}
				67
				68	static inline void mapping_set_unevictable(struct address_space *mapping)
				69	{
				70	set_bit(AS_UNEVICTABLE, &mapping->flags);
				71	}
				72
				73	static inline void mapping_clear_unevictable(struct address_space *mapping)
				74	{
				75	clear_bit(AS_UNEVICTABLE, &mapping->flags);
				76	}
				77
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	78	static inline bool mapping_unevictable(struct address_space *mapping)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	79	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	80	return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	81	}
				82
				83	static inline void mapping_set_exiting(struct address_space *mapping)
				84	{
				85	set_bit(AS_EXITING, &mapping->flags);
				86	}
				87
				88	static inline int mapping_exiting(struct address_space *mapping)
				89	{
				90	return test_bit(AS_EXITING, &mapping->flags);
				91	}
				92
				93	static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
				94	{
				95	set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
				96	}
				97
				98	static inline int mapping_use_writeback_tags(struct address_space *mapping)
				99	{
				100	return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
				101	}
				102
				103	static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
				104	{
				105	return mapping->gfp_mask;
				106	}
				107
				108	/* Restricts the given gfp_mask to what the mapping allows. */
				109	static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
				110	gfp_t gfp_mask)
				111	{
				112	return mapping_gfp_mask(mapping) & gfp_mask;
				113	}
				114
				115	/*
				116	* This is non-atomic. Only to be used before the mapping is activated.
				117	* Probably needs a barrier...
				118	*/
				119	static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
				120	{
				121	m->gfp_mask = mask;
				122	}
				123
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	124	static inline bool mapping_thp_support(struct address_space *mapping)
				125	{
				126	return test_bit(AS_THP_SUPPORT, &mapping->flags);
				127	}
				128
				129	static inline int filemap_nr_thps(struct address_space *mapping)
				130	{
				131	#ifdef CONFIG_READ_ONLY_THP_FOR_FS
				132	return atomic_read(&mapping->nr_thps);
				133	#else
				134	return 0;
				135	#endif
				136	}
				137
				138	static inline void filemap_nr_thps_inc(struct address_space *mapping)
				139	{
				140	#ifdef CONFIG_READ_ONLY_THP_FOR_FS
				141	if (!mapping_thp_support(mapping))
				142	atomic_inc(&mapping->nr_thps);
				143	#else
				144	WARN_ON_ONCE(1);
				145	#endif
				146	}
				147
				148	static inline void filemap_nr_thps_dec(struct address_space *mapping)
				149	{
				150	#ifdef CONFIG_READ_ONLY_THP_FOR_FS
				151	if (!mapping_thp_support(mapping))
				152	atomic_dec(&mapping->nr_thps);
				153	#else
				154	WARN_ON_ONCE(1);
				155	#endif
				156	}
				157
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	158	void release_pages(struct page **pages, int nr);
				159
				160	/*
				161	* speculatively take a reference to a page.
				162	* If the page is free (_refcount == 0), then _refcount is untouched, and 0
				163	* is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
				164	*
				165	* This function must be called inside the same rcu_read_lock() section as has
				166	* been used to lookup the page in the pagecache radix-tree (or page table):
				167	* this allows allocators to use a synchronize_rcu() to stabilize _refcount.
				168	*
				169	* Unless an RCU grace period has passed, the count of all pages coming out
				170	* of the allocator must be considered unstable. page_count may return higher
				171	* than expected, and put_page must be able to do the right thing when the
				172	* page has been finished with, no matter what it is subsequently allocated
				173	* for (because put_page is what is used here to drop an invalid speculative
				174	* reference).
				175	*
				176	* This is the interesting part of the lockless pagecache (and lockless
				177	* get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
				178	* has the following pattern:
				179	* 1. find page in radix tree
				180	* 2. conditionally increment refcount
				181	* 3. check the page is still in pagecache (if no, goto 1)
				182	*
				183	* Remove-side that cares about stability of _refcount (eg. reclaim) has the
				184	* following (with the i_pages lock held):
				185	* A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
				186	* B. remove page from pagecache
				187	* C. free the page
				188	*
				189	* There are 2 critical interleavings that matter:
				190	* - 2 runs before A: in this case, A sees elevated refcount and bails out
				191	* - A runs before 2: in this case, 2 sees zero refcount and retries;
				192	* subsequently, B will complete and 1 will find no page, causing the
				193	* lookup to return NULL.
				194	*
				195	* It is possible that between 1 and 2, the page is removed then the exact same
				196	* page is inserted into the same position in pagecache. That's OK: the
				197	* old find_get_page using a lock could equally have run before or after
				198	* such a re-insertion, depending on order that locks are granted.
				199	*
				200	* Lookups racing against pagecache insertion isn't a big problem: either 1
				201	* will find the page or it will not. Likewise, the old find_get_page could run
				202	* either before the insertion or afterwards, depending on timing.
				203	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	204	static inline int __page_cache_add_speculative(struct page *page, int count)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	205	{
				206	#ifdef CONFIG_TINY_RCU
				207	# ifdef CONFIG_PREEMPT_COUNT
				208	VM_BUG_ON(!in_atomic() && !irqs_disabled());
				209	# endif
				210	/*
				211	* Preempt must be disabled here - we rely on rcu_read_lock doing
				212	* this for us.
				213	*
				214	* Pagecache won't be truncated from interrupt context, so if we have
				215	* found a page in the radix tree here, we have pinned its refcount by
				216	* disabling preempt, and hence no need for the "speculative get" that
				217	* SMP requires.
				218	*/
				219	VM_BUG_ON_PAGE(page_count(page) == 0, page);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	220	page_ref_add(page, count);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	221
				222	#else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	223	if (unlikely(!page_ref_add_unless(page, count, 0))) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	224	/*
				225	* Either the page has been freed, or will be freed.
				226	* In either case, retry here and the caller should
				227	* do the right thing (see comments above).
				228	*/
				229	return 0;
				230	}
				231	#endif
				232	VM_BUG_ON_PAGE(PageTail(page), page);
				233
				234	return 1;
				235	}
				236
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	237	static inline int page_cache_get_speculative(struct page *page)
				238	{
				239	return __page_cache_add_speculative(page, 1);
				240	}
				241
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	242	static inline int page_cache_add_speculative(struct page *page, int count)
				243	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	244	return __page_cache_add_speculative(page, count);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	245	}
				246
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	247	/**
				248	* attach_page_private - Attach private data to a page.
				249	* @page: Page to attach data to.
				250	* @data: Data to attach to page.
				251	*
				252	* Attaching private data to a page increments the page's reference count.
				253	* The data must be detached before the page will be freed.
				254	*/
				255	static inline void attach_page_private(struct page page, void data)
				256	{
				257	get_page(page);
				258	set_page_private(page, (unsigned long)data);
				259	SetPagePrivate(page);
				260	}
				261
				262	/**
				263	* detach_page_private - Detach private data from a page.
				264	* @page: Page to detach data from.
				265	*
				266	* Removes the data that was previously attached to the page and decrements
				267	* the refcount on the page.
				268	*
				269	* Return: Data that was attached to the page.
				270	*/
				271	static inline void detach_page_private(struct page page)
				272	{
				273	void data = (void )page_private(page);
				274
				275	if (!PagePrivate(page))
				276	return NULL;
				277	ClearPagePrivate(page);
				278	set_page_private(page, 0);
				279	put_page(page);
				280
				281	return data;
				282	}
				283
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	284	#ifdef CONFIG_NUMA
				285	extern struct page *__page_cache_alloc(gfp_t gfp);
				286	#else
				287	static inline struct page *__page_cache_alloc(gfp_t gfp)
				288	{
				289	return alloc_pages(gfp, 0);
				290	}
				291	#endif
				292
				293	static inline struct page page_cache_alloc(struct address_space x)
				294	{
				295	return __page_cache_alloc(mapping_gfp_mask(x));
				296	}
				297
				298	static inline gfp_t readahead_gfp_mask(struct address_space *x)
				299	{
				300	return mapping_gfp_mask(x) \| __GFP_NORETRY \| __GFP_NOWARN;
				301	}
				302
				303	typedef int filler_t(void , struct page );
				304
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	305	pgoff_t page_cache_next_miss(struct address_space *mapping,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	306	pgoff_t index, unsigned long max_scan);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	307	pgoff_t page_cache_prev_miss(struct address_space *mapping,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	308	pgoff_t index, unsigned long max_scan);
				309
				310	#define FGP_ACCESSED 0x00000001
				311	#define FGP_LOCK 0x00000002
				312	#define FGP_CREAT 0x00000004
				313	#define FGP_WRITE 0x00000008
				314	#define FGP_NOFS 0x00000010
				315	#define FGP_NOWAIT 0x00000020
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	316	#define FGP_FOR_MMAP 0x00000040
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	317	#define FGP_HEAD 0x00000080
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	318
				319	struct page pagecache_get_page(struct address_space mapping, pgoff_t offset,
				320	int fgp_flags, gfp_t cache_gfp_mask);
				321
				322	/**
				323	* find_get_page - find and get a page reference
				324	* @mapping: the address_space to search
				325	* @offset: the page index
				326	*
				327	* Looks up the page cache slot at @mapping & @offset. If there is a
				328	* page cache page, it is returned with an increased refcount.
				329	*
				330	* Otherwise, %NULL is returned.
				331	*/
				332	static inline struct page find_get_page(struct address_space mapping,
				333	pgoff_t offset)
				334	{
				335	return pagecache_get_page(mapping, offset, 0, 0);
				336	}
				337
				338	static inline struct page find_get_page_flags(struct address_space mapping,
				339	pgoff_t offset, int fgp_flags)
				340	{
				341	return pagecache_get_page(mapping, offset, fgp_flags, 0);
				342	}
				343
				344	/**
				345	* find_lock_page - locate, pin and lock a pagecache page
				346	* @mapping: the address_space to search
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	347	* @index: the page index
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	348	*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	349	* Looks up the page cache entry at @mapping & @index. If there is a
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	350	* page cache page, it is returned locked and with an increased
				351	* refcount.
				352	*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	353	* Context: May sleep.
				354	* Return: A struct page or %NULL if there is no page in the cache for this
				355	* index.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	356	*/
				357	static inline struct page find_lock_page(struct address_space mapping,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	358	pgoff_t index)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	359	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	360	return pagecache_get_page(mapping, index, FGP_LOCK, 0);
				361	}
				362
				363	/**
				364	* find_lock_head - Locate, pin and lock a pagecache page.
				365	* @mapping: The address_space to search.
				366	* @index: The page index.
				367	*
				368	* Looks up the page cache entry at @mapping & @index. If there is a
				369	* page cache page, its head page is returned locked and with an increased
				370	* refcount.
				371	*
				372	* Context: May sleep.
				373	* Return: A struct page which is !PageTail, or %NULL if there is no page
				374	* in the cache for this index.
				375	*/
				376	static inline struct page find_lock_head(struct address_space mapping,
				377	pgoff_t index)
				378	{
				379	return pagecache_get_page(mapping, index, FGP_LOCK \| FGP_HEAD, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	380	}
				381
				382	/**
				383	* find_or_create_page - locate or add a pagecache page
				384	* @mapping: the page's address_space
				385	* @index: the page's index into the mapping
				386	* @gfp_mask: page allocation mode
				387	*
				388	* Looks up the page cache slot at @mapping & @offset. If there is a
				389	* page cache page, it is returned locked and with an increased
				390	* refcount.
				391	*
				392	* If the page is not present, a new page is allocated using @gfp_mask
				393	* and added to the page cache and the VM's LRU list. The page is
				394	* returned locked and with an increased refcount.
				395	*
				396	* On memory exhaustion, %NULL is returned.
				397	*
				398	* find_or_create_page() may sleep, even if @gfp_flags specifies an
				399	* atomic allocation!
				400	*/
				401	static inline struct page find_or_create_page(struct address_space mapping,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	402	pgoff_t index, gfp_t gfp_mask)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	403	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	404	return pagecache_get_page(mapping, index,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	405	FGP_LOCK\|FGP_ACCESSED\|FGP_CREAT,
				406	gfp_mask);
				407	}
				408
				409	/**
				410	* grab_cache_page_nowait - returns locked page at given index in given cache
				411	* @mapping: target address_space
				412	* @index: the page index
				413	*
				414	* Same as grab_cache_page(), but do not wait if the page is unavailable.
				415	* This is intended for speculative data generators, where the data can
				416	* be regenerated if the page couldn't be grabbed. This routine should
				417	* be safe to call while holding the lock for another page.
				418	*
				419	* Clear __GFP_FS when allocating the page to avoid recursion into the fs
				420	* and deadlock against the caller's locked page.
				421	*/
				422	static inline struct page grab_cache_page_nowait(struct address_space mapping,
				423	pgoff_t index)
				424	{
				425	return pagecache_get_page(mapping, index,
				426	FGP_LOCK\|FGP_CREAT\|FGP_NOFS\|FGP_NOWAIT,
				427	mapping_gfp_mask(mapping));
				428	}
				429
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	430	/* Does this page contain this index? */
				431	static inline bool thp_contains(struct page *head, pgoff_t index)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	432	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	433	/* HugeTLBfs indexes the page cache in units of hpage_size */
				434	if (PageHuge(head))
				435	return head->index == index;
				436	return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	437	}
				438
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	439	/*
				440	* Given the page we found in the page cache, return the page corresponding
				441	* to this index in the file
				442	*/
				443	static inline struct page find_subpage(struct page head, pgoff_t index)
				444	{
				445	/* HugeTLBfs wants the head page regardless */
				446	if (PageHuge(head))
				447	return head;
				448
				449	return head + (index & (thp_nr_pages(head) - 1));
				450	}
				451
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	452	unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
				453	unsigned int nr_entries, struct page **entries,
				454	pgoff_t *indices);
				455	unsigned find_get_pages_range(struct address_space mapping, pgoff_t start,
				456	pgoff_t end, unsigned int nr_pages,
				457	struct page **pages);
				458	static inline unsigned find_get_pages(struct address_space *mapping,
				459	pgoff_t *start, unsigned int nr_pages,
				460	struct page **pages)
				461	{
				462	return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages,
				463	pages);
				464	}
				465	unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
				466	unsigned int nr_pages, struct page **pages);
				467	unsigned find_get_pages_range_tag(struct address_space mapping, pgoff_t index,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	468	pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	469	struct page **pages);
				470	static inline unsigned find_get_pages_tag(struct address_space *mapping,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	471	pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	472	struct page **pages)
				473	{
				474	return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
				475	nr_pages, pages);
				476	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	477
				478	struct page grab_cache_page_write_begin(struct address_space mapping,
				479	pgoff_t index, unsigned flags);
				480
				481	/*
				482	* Returns locked page at given index in given cache, creating it if needed.
				483	*/
				484	static inline struct page grab_cache_page(struct address_space mapping,
				485	pgoff_t index)
				486	{
				487	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
				488	}
				489
				490	extern struct page * read_cache_page(struct address_space *mapping,
				491	pgoff_t index, filler_t filler, void data);
				492	extern struct page * read_cache_page_gfp(struct address_space *mapping,
				493	pgoff_t index, gfp_t gfp_mask);
				494	extern int read_cache_pages(struct address_space *mapping,
				495	struct list_head pages, filler_t filler, void *data);
				496
				497	static inline struct page read_mapping_page(struct address_space mapping,
				498	pgoff_t index, void *data)
				499	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	500	return read_cache_page(mapping, index, NULL, data);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	501	}
				502
				503	/*
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	504	* Get index of the page within radix-tree (but not for hugetlb pages).
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	505	* (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
				506	*/
				507	static inline pgoff_t page_to_index(struct page *page)
				508	{
				509	pgoff_t pgoff;
				510
				511	if (likely(!PageTransTail(page)))
				512	return page->index;
				513
				514	/*
				515	* We don't initialize ->index for tail pages: calculate based on
				516	* head page
				517	*/
				518	pgoff = compound_head(page)->index;
				519	pgoff += page - compound_head(page);
				520	return pgoff;
				521	}
				522
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	523	extern pgoff_t hugetlb_basepage_index(struct page *page);
				524
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	525	/*
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	526	* Get the offset in PAGE_SIZE (even for hugetlb pages).
				527	* (TODO: hugetlb pages should have ->index in PAGE_SIZE)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	528	*/
				529	static inline pgoff_t page_to_pgoff(struct page *page)
				530	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	531	if (unlikely(PageHuge(page)))
				532	return hugetlb_basepage_index(page);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	533	return page_to_index(page);
				534	}
				535
				536	/*
				537	* Return byte-offset into filesystem object for page.
				538	*/
				539	static inline loff_t page_offset(struct page *page)
				540	{
				541	return ((loff_t)page->index) << PAGE_SHIFT;
				542	}
				543
				544	static inline loff_t page_file_offset(struct page *page)
				545	{
				546	return ((loff_t)page_index(page)) << PAGE_SHIFT;
				547	}
				548
				549	extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
				550	unsigned long address);
				551
				552	static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
				553	unsigned long address)
				554	{
				555	pgoff_t pgoff;
				556	if (unlikely(is_vm_hugetlb_page(vma)))
				557	return linear_hugepage_index(vma, address);
				558	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
				559	pgoff += vma->vm_pgoff;
				560	return pgoff;
				561	}
				562
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	563	struct wait_page_key {
				564	struct page *page;
				565	int bit_nr;
				566	int page_match;
				567	};
				568
				569	struct wait_page_queue {
				570	struct page *page;
				571	int bit_nr;
				572	wait_queue_entry_t wait;
				573	};
				574
				575	static inline bool wake_page_match(struct wait_page_queue *wait_page,
				576	struct wait_page_key *key)
				577	{
				578	if (wait_page->page != key->page)
				579	return false;
				580	key->page_match = 1;
				581
				582	if (wait_page->bit_nr != key->bit_nr)
				583	return false;
				584
				585	return true;
				586	}
				587
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	588	extern void __lock_page(struct page *page);
				589	extern int __lock_page_killable(struct page *page);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	590	extern int __lock_page_async(struct page page, struct wait_page_queue wait);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	591	extern int __lock_page_or_retry(struct page page, struct mm_struct mm,
				592	unsigned int flags);
				593	extern void unlock_page(struct page *page);
				594
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	595	/*
				596	* Return true if the page was successfully locked
				597	*/
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	598	static inline int trylock_page(struct page *page)
				599	{
				600	page = compound_head(page);
				601	return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
				602	}
				603
				604	/*
				605	* lock_page may only be called if we have the page's inode pinned.
				606	*/
				607	static inline void lock_page(struct page *page)
				608	{
				609	might_sleep();
				610	if (!trylock_page(page))
				611	__lock_page(page);
				612	}
				613
				614	/*
				615	* lock_page_killable is like lock_page but can be interrupted by fatal
				616	* signals. It returns 0 if it locked the page and -EINTR if it was
				617	* killed while waiting.
				618	*/
				619	static inline int lock_page_killable(struct page *page)
				620	{
				621	might_sleep();
				622	if (!trylock_page(page))
				623	return __lock_page_killable(page);
				624	return 0;
				625	}
				626
				627	/*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	628	* lock_page_async - Lock the page, unless this would block. If the page
				629	* is already locked, then queue a callback when the page becomes unlocked.
				630	* This callback can then retry the operation.
				631	*
				632	* Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
				633	* was already locked and the callback defined in 'wait' was queued.
				634	*/
				635	static inline int lock_page_async(struct page *page,
				636	struct wait_page_queue *wait)
				637	{
				638	if (!trylock_page(page))
				639	return __lock_page_async(page, wait);
				640	return 0;
				641	}
				642
				643	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	644	* lock_page_or_retry - Lock the page, unless this would block and the
				645	* caller indicated that it can handle a retry.
				646	*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	647	* Return value and mmap_lock implications depend on flags; see
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	648	* __lock_page_or_retry().
				649	*/
				650	static inline int lock_page_or_retry(struct page page, struct mm_struct mm,
				651	unsigned int flags)
				652	{
				653	might_sleep();
				654	return trylock_page(page) \|\| __lock_page_or_retry(page, mm, flags);
				655	}
				656
				657	/*
				658	* This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
				659	* and should not be used directly.
				660	*/
				661	extern void wait_on_page_bit(struct page *page, int bit_nr);
				662	extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
				663
				664	/*
				665	* Wait for a page to be unlocked.
				666	*
				667	* This must be called with the caller "holding" the page,
				668	* ie with increased "page->count" so that the page won't
				669	* go away during the wait..
				670	*/
				671	static inline void wait_on_page_locked(struct page *page)
				672	{
				673	if (PageLocked(page))
				674	wait_on_page_bit(compound_head(page), PG_locked);
				675	}
				676
				677	static inline int wait_on_page_locked_killable(struct page *page)
				678	{
				679	if (!PageLocked(page))
				680	return 0;
				681	return wait_on_page_bit_killable(compound_head(page), PG_locked);
				682	}
				683
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	684	extern void put_and_wait_on_page_locked(struct page *page);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	685
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	686	void wait_on_page_writeback(struct page *page);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	687	extern void end_page_writeback(struct page *page);
				688	void wait_for_stable_page(struct page *page);
				689
				690	void page_endio(struct page *page, bool is_write, int err);
				691
				692	/*
				693	* Add an arbitrary waiter to a page's wait queue
				694	*/
				695	extern void add_page_wait_queue(struct page page, wait_queue_entry_t waiter);
				696
				697	/*
				698	* Fault everything in given userspace address range in.
				699	*/
				700	static inline int fault_in_pages_writeable(char __user *uaddr, int size)
				701	{
				702	char __user *end = uaddr + size - 1;
				703
				704	if (unlikely(size == 0))
				705	return 0;
				706
				707	if (unlikely(uaddr > end))
				708	return -EFAULT;
				709	/*
				710	* Writing zeroes into userspace here is OK, because we know that if
				711	* the zero gets there, we'll be overwriting it.
				712	*/
				713	do {
				714	if (unlikely(__put_user(0, uaddr) != 0))
				715	return -EFAULT;
				716	uaddr += PAGE_SIZE;
				717	} while (uaddr <= end);
				718
				719	/* Check whether the range spilled into the next page. */
				720	if (((unsigned long)uaddr & PAGE_MASK) ==
				721	((unsigned long)end & PAGE_MASK))
				722	return __put_user(0, end);
				723
				724	return 0;
				725	}
				726
				727	static inline int fault_in_pages_readable(const char __user *uaddr, int size)
				728	{
				729	volatile char c;
				730	const char __user *end = uaddr + size - 1;
				731
				732	if (unlikely(size == 0))
				733	return 0;
				734
				735	if (unlikely(uaddr > end))
				736	return -EFAULT;
				737
				738	do {
				739	if (unlikely(__get_user(c, uaddr) != 0))
				740	return -EFAULT;
				741	uaddr += PAGE_SIZE;
				742	} while (uaddr <= end);
				743
				744	/* Check whether the range spilled into the next page. */
				745	if (((unsigned long)uaddr & PAGE_MASK) ==
				746	((unsigned long)end & PAGE_MASK)) {
				747	return __get_user(c, end);
				748	}
				749
				750	(void)c;
				751	return 0;
				752	}
				753
				754	int add_to_page_cache_locked(struct page page, struct address_space mapping,
				755	pgoff_t index, gfp_t gfp_mask);
				756	int add_to_page_cache_lru(struct page page, struct address_space mapping,
				757	pgoff_t index, gfp_t gfp_mask);
				758	extern void delete_from_page_cache(struct page *page);
				759	extern void __delete_from_page_cache(struct page page, void shadow);
				760	int replace_page_cache_page(struct page old, struct page new, gfp_t gfp_mask);
				761	void delete_from_page_cache_batch(struct address_space *mapping,
				762	struct pagevec *pvec);
				763
				764	/*
				765	* Like add_to_page_cache_locked, but used to add newly allocated pages:
				766	* the page is new, so we can just run __SetPageLocked() against it.
				767	*/
				768	static inline int add_to_page_cache(struct page *page,
				769	struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
				770	{
				771	int error;
				772
				773	__SetPageLocked(page);
				774	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
				775	if (unlikely(error))
				776	__ClearPageLocked(page);
				777	return error;
				778	}
				779
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	780	/**
				781	* struct readahead_control - Describes a readahead request.
				782	*
				783	* A readahead request is for consecutive pages. Filesystems which
				784	* implement the ->readahead method should call readahead_page() or
				785	* readahead_page_batch() in a loop and attempt to start I/O against
				786	* each page in the request.
				787	*
				788	* Most of the fields in this struct are private and should be accessed
				789	* by the functions below.
				790	*
				791	* @file: The file, used primarily by network filesystems for authentication.
				792	* May be NULL if invoked internally by the filesystem.
				793	* @mapping: Readahead this filesystem object.
				794	*/
				795	struct readahead_control {
				796	struct file *file;
				797	struct address_space *mapping;
				798	/* private: use the readahead_* accessors instead */
				799	pgoff_t _index;
				800	unsigned int _nr_pages;
				801	unsigned int _batch_count;
				802	};
				803
				804	#define DEFINE_READAHEAD(rac, f, m, i) \
				805	struct readahead_control rac = { \
				806	.file = f, \
				807	.mapping = m, \
				808	._index = i, \
				809	}
				810
				811	#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
				812
				813	void page_cache_ra_unbounded(struct readahead_control *,
				814	unsigned long nr_to_read, unsigned long lookahead_count);
				815	void page_cache_sync_ra(struct readahead_control , struct file_ra_state ,
				816	unsigned long req_count);
				817	void page_cache_async_ra(struct readahead_control , struct file_ra_state ,
				818	struct page *, unsigned long req_count);
				819
				820	/**
				821	* page_cache_sync_readahead - generic file readahead
				822	* @mapping: address_space which holds the pagecache and I/O vectors
				823	* @ra: file_ra_state which holds the readahead state
				824	* @file: Used by the filesystem for authentication.
				825	* @index: Index of first page to be read.
				826	* @req_count: Total number of pages being read by the caller.
				827	*
				828	* page_cache_sync_readahead() should be called when a cache miss happened:
				829	* it will submit the read. The readahead logic may decide to piggyback more
				830	* pages onto the read request if access patterns suggest it will improve
				831	* performance.
				832	*/
				833	static inline
				834	void page_cache_sync_readahead(struct address_space *mapping,
				835	struct file_ra_state ra, struct file file, pgoff_t index,
				836	unsigned long req_count)
				837	{
				838	DEFINE_READAHEAD(ractl, file, mapping, index);
				839	page_cache_sync_ra(&ractl, ra, req_count);
				840	}
				841
				842	/**
				843	* page_cache_async_readahead - file readahead for marked pages
				844	* @mapping: address_space which holds the pagecache and I/O vectors
				845	* @ra: file_ra_state which holds the readahead state
				846	* @file: Used by the filesystem for authentication.
				847	* @page: The page at @index which triggered the readahead call.
				848	* @index: Index of first page to be read.
				849	* @req_count: Total number of pages being read by the caller.
				850	*
				851	* page_cache_async_readahead() should be called when a page is used which
				852	* is marked as PageReadahead; this is a marker to suggest that the application
				853	* has used up enough of the readahead window that we should start pulling in
				854	* more pages.
				855	*/
				856	static inline
				857	void page_cache_async_readahead(struct address_space *mapping,
				858	struct file_ra_state ra, struct file file,
				859	struct page *page, pgoff_t index, unsigned long req_count)
				860	{
				861	DEFINE_READAHEAD(ractl, file, mapping, index);
				862	page_cache_async_ra(&ractl, ra, page, req_count);
				863	}
				864
				865	/**
				866	* readahead_page - Get the next page to read.
				867	* @rac: The current readahead request.
				868	*
				869	* Context: The page is locked and has an elevated refcount. The caller
				870	* should decreases the refcount once the page has been submitted for I/O
				871	* and unlock the page once all I/O to that page has completed.
				872	* Return: A pointer to the next page, or %NULL if we are done.
				873	*/
				874	static inline struct page readahead_page(struct readahead_control rac)
				875	{
				876	struct page *page;
				877
				878	BUG_ON(rac->_batch_count > rac->_nr_pages);
				879	rac->_nr_pages -= rac->_batch_count;
				880	rac->_index += rac->_batch_count;
				881
				882	if (!rac->_nr_pages) {
				883	rac->_batch_count = 0;
				884	return NULL;
				885	}
				886
				887	page = xa_load(&rac->mapping->i_pages, rac->_index);
				888	VM_BUG_ON_PAGE(!PageLocked(page), page);
				889	rac->_batch_count = thp_nr_pages(page);
				890
				891	return page;
				892	}
				893
				894	static inline unsigned int __readahead_batch(struct readahead_control *rac,
				895	struct page **array, unsigned int array_sz)
				896	{
				897	unsigned int i = 0;
				898	XA_STATE(xas, &rac->mapping->i_pages, 0);
				899	struct page *page;
				900
				901	BUG_ON(rac->_batch_count > rac->_nr_pages);
				902	rac->_nr_pages -= rac->_batch_count;
				903	rac->_index += rac->_batch_count;
				904	rac->_batch_count = 0;
				905
				906	xas_set(&xas, rac->_index);
				907	rcu_read_lock();
				908	xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
				909	if (xas_retry(&xas, page))
				910	continue;
				911	VM_BUG_ON_PAGE(!PageLocked(page), page);
				912	VM_BUG_ON_PAGE(PageTail(page), page);
				913	array[i++] = page;
				914	rac->_batch_count += thp_nr_pages(page);
				915
				916	/*
				917	* The page cache isn't using multi-index entries yet,
				918	* so the xas cursor needs to be manually moved to the
				919	* next index. This can be removed once the page cache
				920	* is converted.
				921	*/
				922	if (PageHead(page))
				923	xas_set(&xas, rac->_index + rac->_batch_count);
				924
				925	if (i == array_sz)
				926	break;
				927	}
				928	rcu_read_unlock();
				929
				930	return i;
				931	}
				932
				933	/**
				934	* readahead_page_batch - Get a batch of pages to read.
				935	* @rac: The current readahead request.
				936	* @array: An array of pointers to struct page.
				937	*
				938	* Context: The pages are locked and have an elevated refcount. The caller
				939	* should decreases the refcount once the page has been submitted for I/O
				940	* and unlock the page once all I/O to that page has completed.
				941	* Return: The number of pages placed in the array. 0 indicates the request
				942	* is complete.
				943	*/
				944	#define readahead_page_batch(rac, array) \
				945	__readahead_batch(rac, array, ARRAY_SIZE(array))
				946
				947	/**
				948	* readahead_pos - The byte offset into the file of this readahead request.
				949	* @rac: The readahead request.
				950	*/
				951	static inline loff_t readahead_pos(struct readahead_control *rac)
				952	{
				953	return (loff_t)rac->_index * PAGE_SIZE;
				954	}
				955
				956	/**
				957	* readahead_length - The number of bytes in this readahead request.
				958	* @rac: The readahead request.
				959	*/
				960	static inline loff_t readahead_length(struct readahead_control *rac)
				961	{
				962	return (loff_t)rac->_nr_pages * PAGE_SIZE;
				963	}
				964
				965	/**
				966	* readahead_index - The index of the first page in this readahead request.
				967	* @rac: The readahead request.
				968	*/
				969	static inline pgoff_t readahead_index(struct readahead_control *rac)
				970	{
				971	return rac->_index;
				972	}
				973
				974	/**
				975	* readahead_count - The number of pages in this readahead request.
				976	* @rac: The readahead request.
				977	*/
				978	static inline unsigned int readahead_count(struct readahead_control *rac)
				979	{
				980	return rac->_nr_pages;
				981	}
				982
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	983	static inline unsigned long dir_pages(struct inode *inode)
				984	{
				985	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
				986	PAGE_SHIFT;
				987	}
				988
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame]	989	/**
				990	* page_mkwrite_check_truncate - check if page was truncated
				991	* @page: the page to check
				992	* @inode: the inode to check the page against
				993	*
				994	* Returns the number of bytes in the page up to EOF,
				995	* or -EFAULT if the page was truncated.
				996	*/
				997	static inline int page_mkwrite_check_truncate(struct page *page,
				998	struct inode *inode)
				999	{
				1000	loff_t size = i_size_read(inode);
				1001	pgoff_t index = size >> PAGE_SHIFT;
				1002	int offset = offset_in_page(size);
				1003
				1004	if (page->mapping != inode->i_mapping)
				1005	return -EFAULT;
				1006
				1007	/* page is wholly inside EOF */
				1008	if (page->index < index)
				1009	return PAGE_SIZE;
				1010	/* page is wholly past EOF */
				1011	if (page->index > index \|\| !offset)
				1012	return -EFAULT;
				1013	/* page is partially inside EOF */
				1014	return offset;
				1015	}
				1016
				1017	/**
				1018	* i_blocks_per_page - How many blocks fit in this page.
				1019	* @inode: The inode which contains the blocks.
				1020	* @page: The page (head page if the page is a THP).
				1021	*
				1022	* If the block size is larger than the size of this page, return zero.
				1023	*
				1024	* Context: The caller should hold a refcount on the page to prevent it
				1025	* from being split.
				1026	* Return: The number of filesystem blocks covered by this page.
				1027	*/
				1028	static inline
				1029	unsigned int i_blocks_per_page(struct inode inode, struct page page)
				1030	{
				1031	return thp_size(page) >> inode->i_blkbits;
				1032	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1033	#endif /* _LINUX_PAGEMAP_H */