Blame - arch/x86/mm/pageattr.c - hafnium/third_party/linux.git

blob: 281e584cfe39e5509db8343d25f3d96d1078923e [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* Copyright 2002 Andi Kleen, SuSE Labs.
				4	* Thanks to Ben LaHaise for precious feedback.
				5	*/
				6	#include <linux/highmem.h>
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	7	#include <linux/memblock.h>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	8	#include <linux/sched.h>
				9	#include <linux/mm.h>
				10	#include <linux/interrupt.h>
				11	#include <linux/seq_file.h>
				12	#include <linux/debugfs.h>
				13	#include <linux/pfn.h>
				14	#include <linux/percpu.h>
				15	#include <linux/gfp.h>
				16	#include <linux/pci.h>
				17	#include <linux/vmalloc.h>
				18
				19	#include <asm/e820/api.h>
				20	#include <asm/processor.h>
				21	#include <asm/tlbflush.h>
				22	#include <asm/sections.h>
				23	#include <asm/setup.h>
				24	#include <linux/uaccess.h>
				25	#include <asm/pgalloc.h>
				26	#include <asm/proto.h>
				27	#include <asm/pat.h>
				28	#include <asm/set_memory.h>
				29
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	30	#include "mm_internal.h"
				31
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	32	/*
				33	* The current flushing context - we pass it instead of 5 arguments:
				34	*/
				35	struct cpa_data {
				36	unsigned long *vaddr;
				37	pgd_t *pgd;
				38	pgprot_t mask_set;
				39	pgprot_t mask_clr;
				40	unsigned long numpages;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	41	unsigned long curpage;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	42	unsigned long pfn;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	43	unsigned int flags;
				44	unsigned int force_split : 1,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	45	force_static_prot : 1,
				46	force_flush_all : 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	47	struct page **pages;
				48	};
				49
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	50	enum cpa_warn {
				51	CPA_CONFLICT,
				52	CPA_PROTECT,
				53	CPA_DETECT,
				54	};
				55
				56	static const int cpa_warn_level = CPA_PROTECT;
				57
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	58	/*
				59	* Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
				60	* using cpa_lock. So that we don't allow any other cpu, with stale large tlb
				61	* entries change the page attribute in parallel to some other cpu
				62	* splitting a large page entry along with changing the attribute.
				63	*/
				64	static DEFINE_SPINLOCK(cpa_lock);
				65
				66	#define CPA_FLUSHTLB 1
				67	#define CPA_ARRAY 2
				68	#define CPA_PAGES_ARRAY 4
				69	#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
				70
				71	#ifdef CONFIG_PROC_FS
				72	static unsigned long direct_pages_count[PG_LEVEL_NUM];
				73
				74	void update_page_count(int level, unsigned long pages)
				75	{
				76	/* Protect against CPA */
				77	spin_lock(&pgd_lock);
				78	direct_pages_count[level] += pages;
				79	spin_unlock(&pgd_lock);
				80	}
				81
				82	static void split_page_count(int level)
				83	{
				84	if (direct_pages_count[level] == 0)
				85	return;
				86
				87	direct_pages_count[level]--;
				88	direct_pages_count[level - 1] += PTRS_PER_PTE;
				89	}
				90
				91	void arch_report_meminfo(struct seq_file *m)
				92	{
				93	seq_printf(m, "DirectMap4k: %8lu kB\n",
				94	direct_pages_count[PG_LEVEL_4K] << 2);
				95	#if defined(CONFIG_X86_64) \|\| defined(CONFIG_X86_PAE)
				96	seq_printf(m, "DirectMap2M: %8lu kB\n",
				97	direct_pages_count[PG_LEVEL_2M] << 11);
				98	#else
				99	seq_printf(m, "DirectMap4M: %8lu kB\n",
				100	direct_pages_count[PG_LEVEL_2M] << 12);
				101	#endif
				102	if (direct_gbpages)
				103	seq_printf(m, "DirectMap1G: %8lu kB\n",
				104	direct_pages_count[PG_LEVEL_1G] << 20);
				105	}
				106	#else
				107	static inline void split_page_count(int level) { }
				108	#endif
				109
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	110	#ifdef CONFIG_X86_CPA_STATISTICS
				111
				112	static unsigned long cpa_1g_checked;
				113	static unsigned long cpa_1g_sameprot;
				114	static unsigned long cpa_1g_preserved;
				115	static unsigned long cpa_2m_checked;
				116	static unsigned long cpa_2m_sameprot;
				117	static unsigned long cpa_2m_preserved;
				118	static unsigned long cpa_4k_install;
				119
				120	static inline void cpa_inc_1g_checked(void)
				121	{
				122	cpa_1g_checked++;
				123	}
				124
				125	static inline void cpa_inc_2m_checked(void)
				126	{
				127	cpa_2m_checked++;
				128	}
				129
				130	static inline void cpa_inc_4k_install(void)
				131	{
				132	cpa_4k_install++;
				133	}
				134
				135	static inline void cpa_inc_lp_sameprot(int level)
				136	{
				137	if (level == PG_LEVEL_1G)
				138	cpa_1g_sameprot++;
				139	else
				140	cpa_2m_sameprot++;
				141	}
				142
				143	static inline void cpa_inc_lp_preserved(int level)
				144	{
				145	if (level == PG_LEVEL_1G)
				146	cpa_1g_preserved++;
				147	else
				148	cpa_2m_preserved++;
				149	}
				150
				151	static int cpastats_show(struct seq_file m, void p)
				152	{
				153	seq_printf(m, "1G pages checked: %16lu\n", cpa_1g_checked);
				154	seq_printf(m, "1G pages sameprot: %16lu\n", cpa_1g_sameprot);
				155	seq_printf(m, "1G pages preserved: %16lu\n", cpa_1g_preserved);
				156	seq_printf(m, "2M pages checked: %16lu\n", cpa_2m_checked);
				157	seq_printf(m, "2M pages sameprot: %16lu\n", cpa_2m_sameprot);
				158	seq_printf(m, "2M pages preserved: %16lu\n", cpa_2m_preserved);
				159	seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install);
				160	return 0;
				161	}
				162
				163	static int cpastats_open(struct inode inode, struct file file)
				164	{
				165	return single_open(file, cpastats_show, NULL);
				166	}
				167
				168	static const struct file_operations cpastats_fops = {
				169	.open = cpastats_open,
				170	.read = seq_read,
				171	.llseek = seq_lseek,
				172	.release = single_release,
				173	};
				174
				175	static int __init cpa_stats_init(void)
				176	{
				177	debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL,
				178	&cpastats_fops);
				179	return 0;
				180	}
				181	late_initcall(cpa_stats_init);
				182	#else
				183	static inline void cpa_inc_1g_checked(void) { }
				184	static inline void cpa_inc_2m_checked(void) { }
				185	static inline void cpa_inc_4k_install(void) { }
				186	static inline void cpa_inc_lp_sameprot(int level) { }
				187	static inline void cpa_inc_lp_preserved(int level) { }
				188	#endif
				189
				190
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	191	static inline int
				192	within(unsigned long addr, unsigned long start, unsigned long end)
				193	{
				194	return addr >= start && addr < end;
				195	}
				196
				197	static inline int
				198	within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
				199	{
				200	return addr >= start && addr <= end;
				201	}
				202
				203	#ifdef CONFIG_X86_64
				204
				205	static inline unsigned long highmap_start_pfn(void)
				206	{
				207	return __pa_symbol(_text) >> PAGE_SHIFT;
				208	}
				209
				210	static inline unsigned long highmap_end_pfn(void)
				211	{
				212	/* Do not reference physical address outside the kernel. */
				213	return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
				214	}
				215
				216	static bool __cpa_pfn_in_highmap(unsigned long pfn)
				217	{
				218	/*
				219	* Kernel text has an alias mapping at a high address, known
				220	* here as "highmap".
				221	*/
				222	return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
				223	}
				224
				225	#else
				226
				227	static bool __cpa_pfn_in_highmap(unsigned long pfn)
				228	{
				229	/* There is no highmap on 32-bit */
				230	return false;
				231	}
				232
				233	#endif
				234
				235	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	236	* See set_mce_nospec().
				237	*
				238	* Machine check recovery code needs to change cache mode of poisoned pages to
				239	* UC to avoid speculative access logging another error. But passing the
				240	* address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
				241	* speculative access. So we cheat and flip the top bit of the address. This
				242	* works fine for the code that updates the page tables. But at the end of the
				243	* process we need to flush the TLB and cache and the non-canonical address
				244	* causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
				245	*
				246	* But in the common case we already have a canonical address. This code
				247	* will fix the top bit if needed and is a no-op otherwise.
				248	*/
				249	static inline unsigned long fix_addr(unsigned long addr)
				250	{
				251	#ifdef CONFIG_X86_64
				252	return (long)(addr << 1) >> 1;
				253	#else
				254	return addr;
				255	#endif
				256	}
				257
				258	static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
				259	{
				260	if (cpa->flags & CPA_PAGES_ARRAY) {
				261	struct page *page = cpa->pages[idx];
				262
				263	if (unlikely(PageHighMem(page)))
				264	return 0;
				265
				266	return (unsigned long)page_address(page);
				267	}
				268
				269	if (cpa->flags & CPA_ARRAY)
				270	return cpa->vaddr[idx];
				271
				272	return cpa->vaddr + idx PAGE_SIZE;
				273	}
				274
				275	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	276	* Flushing functions
				277	*/
				278
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	279	static void clflush_cache_range_opt(void *vaddr, unsigned int size)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	280	{
				281	const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
				282	void p = (void )((unsigned long)vaddr & ~(clflush_size - 1));
				283	void *vend = vaddr + size;
				284
				285	if (p >= vend)
				286	return;
				287
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	288	for (; p < vend; p += clflush_size)
				289	clflushopt(p);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	290	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	291
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	292	/**
				293	* clflush_cache_range - flush a cache range with clflush
				294	* @vaddr: virtual start address
				295	* @size: number of bytes to flush
				296	*
				297	* CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
				298	* SFENCE to avoid ordering issues.
				299	*/
				300	void clflush_cache_range(void *vaddr, unsigned int size)
				301	{
				302	mb();
				303	clflush_cache_range_opt(vaddr, size);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	304	mb();
				305	}
				306	EXPORT_SYMBOL_GPL(clflush_cache_range);
				307
				308	void arch_invalidate_pmem(void *addr, size_t size)
				309	{
				310	clflush_cache_range(addr, size);
				311	}
				312	EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
				313
				314	static void __cpa_flush_all(void *arg)
				315	{
				316	unsigned long cache = (unsigned long)arg;
				317
				318	/*
				319	* Flush all to work around Errata in early athlons regarding
				320	* large page flushing.
				321	*/
				322	__flush_tlb_all();
				323
				324	if (cache && boot_cpu_data.x86 >= 4)
				325	wbinvd();
				326	}
				327
				328	static void cpa_flush_all(unsigned long cache)
				329	{
				330	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
				331
				332	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
				333	}
				334
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	335	void __cpa_flush_tlb(void *data)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	336	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	337	struct cpa_data *cpa = data;
				338	unsigned int i;
				339
				340	for (i = 0; i < cpa->numpages; i++)
				341	__flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	342	}
				343
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	344	static void cpa_flush(struct cpa_data *data, int cache)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	345	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	346	struct cpa_data *cpa = data;
				347	unsigned int i;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	348
				349	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	350
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	351	if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
				352	cpa_flush_all(cache);
				353	return;
				354	}
				355
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	356	if (cpa->force_flush_all \|\| cpa->numpages > tlb_single_page_flush_ceiling)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	357	flush_tlb_all();
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	358	else
				359	on_each_cpu(__cpa_flush_tlb, cpa, 1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	360
				361	if (!cache)
				362	return;
				363
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	364	mb();
				365	for (i = 0; i < cpa->numpages; i++) {
				366	unsigned long addr = __cpa_addr(cpa, i);
				367	unsigned int level;
				368
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	369	pte_t *pte = lookup_address(addr, &level);
				370
				371	/*
				372	* Only flush present addresses:
				373	*/
				374	if (pte && (pte_val(*pte) & _PAGE_PRESENT))
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	375	clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	376	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	377	mb();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	378	}
				379
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	380	static bool overlaps(unsigned long r1_start, unsigned long r1_end,
				381	unsigned long r2_start, unsigned long r2_end)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	382	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	383	return (r1_start <= r2_end && r1_end >= r2_start) \|\|
				384	(r2_start <= r1_end && r2_end >= r1_start);
				385	}
				386
				387	#ifdef CONFIG_PCI_BIOS
				388	/*
				389	* The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
				390	* based config access (CONFIG_PCI_GOBIOS) support.
				391	*/
				392	#define BIOS_PFN PFN_DOWN(BIOS_BEGIN)
				393	#define BIOS_PFN_END PFN_DOWN(BIOS_END - 1)
				394
				395	static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
				396	{
				397	if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
				398	return _PAGE_NX;
				399	return 0;
				400	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	401	#else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	402	static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
				403	{
				404	return 0;
				405	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	406	#endif
				407
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	408	/*
				409	* The .rodata section needs to be read-only. Using the pfn catches all
				410	* aliases. This also includes __ro_after_init, so do not enforce until
				411	* kernel_set_to_readonly is true.
				412	*/
				413	static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
				414	{
				415	unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	416
				417	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	418	* Note: __end_rodata is at page aligned and not inclusive, so
				419	* subtract 1 to get the last enforced PFN in the rodata area.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	420	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	421	epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	422
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	423	if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro))
				424	return _PAGE_RW;
				425	return 0;
				426	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	427
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	428	/*
				429	* Protect kernel text against becoming non executable by forbidding
				430	* _PAGE_NX. This protects only the high kernel mapping (_text -> _etext)
				431	* out of which the kernel actually executes. Do not protect the low
				432	* mapping.
				433	*
				434	* This does not cover __inittext since that is gone after boot.
				435	*/
				436	static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
				437	{
				438	unsigned long t_end = (unsigned long)_etext - 1;
				439	unsigned long t_start = (unsigned long)_text;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	440
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	441	if (overlaps(start, end, t_start, t_end))
				442	return _PAGE_NX;
				443	return 0;
				444	}
				445
				446	#if defined(CONFIG_X86_64)
				447	/*
				448	* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
				449	* kernel text mappings for the large page aligned text, rodata sections
				450	* will be always read-only. For the kernel identity mappings covering the
				451	* holes caused by this alignment can be anything that user asks.
				452	*
				453	* This will preserve the large page mappings for kernel text/data at no
				454	* extra cost.
				455	*/
				456	static pgprotval_t protect_kernel_text_ro(unsigned long start,
				457	unsigned long end)
				458	{
				459	unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1;
				460	unsigned long t_start = (unsigned long)_text;
				461	unsigned int level;
				462
				463	if (!kernel_set_to_readonly \|\| !overlaps(start, end, t_start, t_end))
				464	return 0;
				465	/*
				466	* Don't enforce the !RW mapping for the kernel text mapping, if
				467	* the current mapping is already using small page mapping. No
				468	* need to work hard to preserve large page mappings in this case.
				469	*
				470	* This also fixes the Linux Xen paravirt guest boot failure caused
				471	* by unexpected read-only mappings for kernel identity
				472	* mappings. In this paravirt guest case, the kernel text mapping
				473	* and the kernel identity mapping share the same page-table pages,
				474	* so the protections for kernel text and identity mappings have to
				475	* be the same.
				476	*/
				477	if (lookup_address(start, &level) && (level != PG_LEVEL_4K))
				478	return _PAGE_RW;
				479	return 0;
				480	}
				481	#else
				482	static pgprotval_t protect_kernel_text_ro(unsigned long start,
				483	unsigned long end)
				484	{
				485	return 0;
				486	}
				487	#endif
				488
				489	static inline bool conflicts(pgprot_t prot, pgprotval_t val)
				490	{
				491	return (pgprot_val(prot) & ~val) != pgprot_val(prot);
				492	}
				493
				494	static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
				495	unsigned long start, unsigned long end,
				496	unsigned long pfn, const char *txt)
				497	{
				498	static const char *lvltxt[] = {
				499	[CPA_CONFLICT] = "conflict",
				500	[CPA_PROTECT] = "protect",
				501	[CPA_DETECT] = "detect",
				502	};
				503
				504	if (warnlvl > cpa_warn_level \|\| !conflicts(prot, val))
				505	return;
				506
				507	pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n",
				508	lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot),
				509	(unsigned long long)val);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	510	}
				511
				512	/*
				513	* Certain areas of memory on x86 require very specific protection flags,
				514	* for example the BIOS area or kernel text. Callers don't always get this
				515	* right (again, ioremap() on BIOS memory is not uncommon) so this function
				516	* checks and fixes these known static required protection bits.
				517	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	518	static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
				519	unsigned long pfn, unsigned long npg,
				520	unsigned long lpsize, int warnlvl)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	521	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	522	pgprotval_t forbidden, res;
				523	unsigned long end;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	524
				525	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	526	* There is no point in checking RW/NX conflicts when the requested
				527	* mapping is setting the page !PRESENT.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	528	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	529	if (!(pgprot_val(prot) & _PAGE_PRESENT))
				530	return prot;
				531
				532	/* Operate on the virtual address */
				533	end = start + npg * PAGE_SIZE - 1;
				534
				535	res = protect_kernel_text(start, end);
				536	check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
				537	forbidden = res;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	538
				539	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	540	* Special case to preserve a large page. If the change spawns the
				541	* full large page mapping then there is no point to split it
				542	* up. Happens with ftrace and is going to be removed once ftrace
				543	* switched to text_poke().
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	544	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	545	if (lpsize != (npg * PAGE_SIZE) \|\| (start & (lpsize - 1))) {
				546	res = protect_kernel_text_ro(start, end);
				547	check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
				548	forbidden \|= res;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	549	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	550
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	551	/* Check the PFN directly */
				552	res = protect_pci_bios(pfn, pfn + npg - 1);
				553	check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX");
				554	forbidden \|= res;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	555
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	556	res = protect_rodata(pfn, pfn + npg - 1);
				557	check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
				558	forbidden \|= res;
				559
				560	return __pgprot(pgprot_val(prot) & ~forbidden);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	561	}
				562
				563	/*
				564	* Lookup the page table entry for a virtual address in a specific pgd.
				565	* Return a pointer to the entry and the level of the mapping.
				566	*/
				567	pte_t lookup_address_in_pgd(pgd_t pgd, unsigned long address,
				568	unsigned int *level)
				569	{
				570	p4d_t *p4d;
				571	pud_t *pud;
				572	pmd_t *pmd;
				573
				574	*level = PG_LEVEL_NONE;
				575
				576	if (pgd_none(*pgd))
				577	return NULL;
				578
				579	p4d = p4d_offset(pgd, address);
				580	if (p4d_none(*p4d))
				581	return NULL;
				582
				583	*level = PG_LEVEL_512G;
				584	if (p4d_large(p4d) \|\| !p4d_present(p4d))
				585	return (pte_t *)p4d;
				586
				587	pud = pud_offset(p4d, address);
				588	if (pud_none(*pud))
				589	return NULL;
				590
				591	*level = PG_LEVEL_1G;
				592	if (pud_large(pud) \|\| !pud_present(pud))
				593	return (pte_t *)pud;
				594
				595	pmd = pmd_offset(pud, address);
				596	if (pmd_none(*pmd))
				597	return NULL;
				598
				599	*level = PG_LEVEL_2M;
				600	if (pmd_large(pmd) \|\| !pmd_present(pmd))
				601	return (pte_t *)pmd;
				602
				603	*level = PG_LEVEL_4K;
				604
				605	return pte_offset_kernel(pmd, address);
				606	}
				607
				608	/*
				609	* Lookup the page table entry for a virtual address. Return a pointer
				610	* to the entry and the level of the mapping.
				611	*
				612	* Note: We return pud and pmd either when the entry is marked large
				613	* or when the present bit is not set. Otherwise we would return a
				614	* pointer to a nonexisting mapping.
				615	*/
				616	pte_t lookup_address(unsigned long address, unsigned int level)
				617	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	618	return lookup_address_in_pgd(pgd_offset_k(address), address, level);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	619	}
				620	EXPORT_SYMBOL_GPL(lookup_address);
				621
				622	static pte_t _lookup_address_cpa(struct cpa_data cpa, unsigned long address,
				623	unsigned int *level)
				624	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	625	if (cpa->pgd)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	626	return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
				627	address, level);
				628
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	629	return lookup_address(address, level);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	630	}
				631
				632	/*
				633	* Lookup the PMD entry for a virtual address. Return a pointer to the entry
				634	* or NULL if not present.
				635	*/
				636	pmd_t *lookup_pmd_address(unsigned long address)
				637	{
				638	pgd_t *pgd;
				639	p4d_t *p4d;
				640	pud_t *pud;
				641
				642	pgd = pgd_offset_k(address);
				643	if (pgd_none(*pgd))
				644	return NULL;
				645
				646	p4d = p4d_offset(pgd, address);
				647	if (p4d_none(p4d) \|\| p4d_large(p4d) \|\| !p4d_present(*p4d))
				648	return NULL;
				649
				650	pud = pud_offset(p4d, address);
				651	if (pud_none(pud) \|\| pud_large(pud) \|\| !pud_present(*pud))
				652	return NULL;
				653
				654	return pmd_offset(pud, address);
				655	}
				656
				657	/*
				658	* This is necessary because __pa() does not work on some
				659	* kinds of memory, like vmalloc() or the alloc_remap()
				660	* areas on 32-bit NUMA systems. The percpu areas can
				661	* end up in this kind of memory, for instance.
				662	*
				663	* This could be optimized, but it is only intended to be
				664	* used at inititalization time, and keeping it
				665	* unoptimized should increase the testing coverage for
				666	* the more obscure platforms.
				667	*/
				668	phys_addr_t slow_virt_to_phys(void *__virt_addr)
				669	{
				670	unsigned long virt_addr = (unsigned long)__virt_addr;
				671	phys_addr_t phys_addr;
				672	unsigned long offset;
				673	enum pg_level level;
				674	pte_t *pte;
				675
				676	pte = lookup_address(virt_addr, &level);
				677	BUG_ON(!pte);
				678
				679	/*
				680	* pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
				681	* before being left-shifted PAGE_SHIFT bits -- this trick is to
				682	* make 32-PAE kernel work correctly.
				683	*/
				684	switch (level) {
				685	case PG_LEVEL_1G:
				686	phys_addr = (phys_addr_t)pud_pfn((pud_t )pte) << PAGE_SHIFT;
				687	offset = virt_addr & ~PUD_PAGE_MASK;
				688	break;
				689	case PG_LEVEL_2M:
				690	phys_addr = (phys_addr_t)pmd_pfn((pmd_t )pte) << PAGE_SHIFT;
				691	offset = virt_addr & ~PMD_PAGE_MASK;
				692	break;
				693	default:
				694	phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
				695	offset = virt_addr & ~PAGE_MASK;
				696	}
				697
				698	return (phys_addr_t)(phys_addr \| offset);
				699	}
				700	EXPORT_SYMBOL_GPL(slow_virt_to_phys);
				701
				702	/*
				703	* Set the new pmd in all the pgds we know about:
				704	*/
				705	static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
				706	{
				707	/* change init_mm */
				708	set_pte_atomic(kpte, pte);
				709	#ifdef CONFIG_X86_32
				710	if (!SHARED_KERNEL_PMD) {
				711	struct page *page;
				712
				713	list_for_each_entry(page, &pgd_list, lru) {
				714	pgd_t *pgd;
				715	p4d_t *p4d;
				716	pud_t *pud;
				717	pmd_t *pmd;
				718
				719	pgd = (pgd_t *)page_address(page) + pgd_index(address);
				720	p4d = p4d_offset(pgd, address);
				721	pud = pud_offset(p4d, address);
				722	pmd = pmd_offset(pud, address);
				723	set_pte_atomic((pte_t *)pmd, pte);
				724	}
				725	}
				726	#endif
				727	}
				728
				729	static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
				730	{
				731	/*
				732	* _PAGE_GLOBAL means "global page" for present PTEs.
				733	* But, it is also used to indicate _PAGE_PROTNONE
				734	* for non-present PTEs.
				735	*
				736	* This ensures that a _PAGE_GLOBAL PTE going from
				737	* present to non-present is not confused as
				738	* _PAGE_PROTNONE.
				739	*/
				740	if (!(pgprot_val(prot) & _PAGE_PRESENT))
				741	pgprot_val(prot) &= ~_PAGE_GLOBAL;
				742
				743	return prot;
				744	}
				745
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	746	static int __should_split_large_page(pte_t *kpte, unsigned long address,
				747	struct cpa_data *cpa)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	748	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	749	unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
				750	pgprot_t old_prot, new_prot, req_prot, chk_prot;
				751	pte_t new_pte, *tmp;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	752	enum pg_level level;
				753
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	754	/*
				755	* Check for races, another CPU might have split this page
				756	* up already:
				757	*/
				758	tmp = _lookup_address_cpa(cpa, address, &level);
				759	if (tmp != kpte)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	760	return 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	761
				762	switch (level) {
				763	case PG_LEVEL_2M:
				764	old_prot = pmd_pgprot((pmd_t )kpte);
				765	old_pfn = pmd_pfn((pmd_t )kpte);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	766	cpa_inc_2m_checked();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	767	break;
				768	case PG_LEVEL_1G:
				769	old_prot = pud_pgprot((pud_t )kpte);
				770	old_pfn = pud_pfn((pud_t )kpte);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	771	cpa_inc_1g_checked();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	772	break;
				773	default:
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	774	return -EINVAL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	775	}
				776
				777	psize = page_level_size(level);
				778	pmask = page_level_mask(level);
				779
				780	/*
				781	* Calculate the number of pages, which fit into this large
				782	* page starting at address:
				783	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	784	lpaddr = (address + psize) & pmask;
				785	numpages = (lpaddr - address) >> PAGE_SHIFT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	786	if (numpages < cpa->numpages)
				787	cpa->numpages = numpages;
				788
				789	/*
				790	* We are safe now. Check whether the new pgprot is the same:
				791	* Convert protection attributes to 4k-format, as cpa->mask* are set
				792	* up accordingly.
				793	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	794
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	795	/* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
				796	req_prot = pgprot_large_2_4k(old_prot);
				797
				798	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
				799	pgprot_val(req_prot) \|= pgprot_val(cpa->mask_set);
				800
				801	/*
				802	* req_prot is in format of 4k pages. It must be converted to large
				803	* page format: the caching mode includes the PAT bit located at
				804	* different bit positions in the two formats.
				805	*/
				806	req_prot = pgprot_4k_2_large(req_prot);
				807	req_prot = pgprot_clear_protnone_bits(req_prot);
				808	if (pgprot_val(req_prot) & _PAGE_PRESENT)
				809	pgprot_val(req_prot) \|= _PAGE_PSE;
				810
				811	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	812	* old_pfn points to the large page base pfn. So we need to add the
				813	* offset of the virtual address:
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	814	*/
				815	pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
				816	cpa->pfn = pfn;
				817
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	818	/*
				819	* Calculate the large page base address and the number of 4K pages
				820	* in the large page
				821	*/
				822	lpaddr = address & pmask;
				823	numpages = psize >> PAGE_SHIFT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	824
				825	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	826	* Sanity check that the existing mapping is correct versus the static
				827	* protections. static_protections() guards against !PRESENT, so no
				828	* extra conditional required here.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	829	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	830	chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
				831	psize, CPA_CONFLICT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	832
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	833	if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	834	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	835	* Split the large page and tell the split code to
				836	* enforce static protections.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	837	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	838	cpa->force_static_prot = 1;
				839	return 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	840	}
				841
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	842	/*
				843	* Optimization: If the requested pgprot is the same as the current
				844	* pgprot, then the large page can be preserved and no updates are
				845	* required independent of alignment and length of the requested
				846	* range. The above already established that the current pgprot is
				847	* correct, which in consequence makes the requested pgprot correct
				848	* as well if it is the same. The static protection scan below will
				849	* not come to a different conclusion.
				850	*/
				851	if (pgprot_val(req_prot) == pgprot_val(old_prot)) {
				852	cpa_inc_lp_sameprot(level);
				853	return 0;
				854	}
				855
				856	/*
				857	* If the requested range does not cover the full page, split it up
				858	*/
				859	if (address != lpaddr \|\| cpa->numpages != numpages)
				860	return 1;
				861
				862	/*
				863	* Check whether the requested pgprot is conflicting with a static
				864	* protection requirement in the large page.
				865	*/
				866	new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
				867	psize, CPA_DETECT);
				868
				869	/*
				870	* If there is a conflict, split the large page.
				871	*
				872	* There used to be a 4k wise evaluation trying really hard to
				873	* preserve the large pages, but experimentation has shown, that this
				874	* does not help at all. There might be corner cases which would
				875	* preserve one large page occasionally, but it's really not worth the
				876	* extra code and cycles for the common case.
				877	*/
				878	if (pgprot_val(req_prot) != pgprot_val(new_prot))
				879	return 1;
				880
				881	/* All checks passed. Update the large page mapping. */
				882	new_pte = pfn_pte(old_pfn, new_prot);
				883	__set_pmd_pte(kpte, address, new_pte);
				884	cpa->flags \|= CPA_FLUSHTLB;
				885	cpa_inc_lp_preserved(level);
				886	return 0;
				887	}
				888
				889	static int should_split_large_page(pte_t *kpte, unsigned long address,
				890	struct cpa_data *cpa)
				891	{
				892	int do_split;
				893
				894	if (cpa->force_split)
				895	return 1;
				896
				897	spin_lock(&pgd_lock);
				898	do_split = __should_split_large_page(kpte, address, cpa);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	899	spin_unlock(&pgd_lock);
				900
				901	return do_split;
				902	}
				903
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	904	static void split_set_pte(struct cpa_data cpa, pte_t pte, unsigned long pfn,
				905	pgprot_t ref_prot, unsigned long address,
				906	unsigned long size)
				907	{
				908	unsigned int npg = PFN_DOWN(size);
				909	pgprot_t prot;
				910
				911	/*
				912	* If should_split_large_page() discovered an inconsistent mapping,
				913	* remove the invalid protection in the split mapping.
				914	*/
				915	if (!cpa->force_static_prot)
				916	goto set;
				917
				918	/* Hand in lpsize = 0 to enforce the protection mechanism */
				919	prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
				920
				921	if (pgprot_val(prot) == pgprot_val(ref_prot))
				922	goto set;
				923
				924	/*
				925	* If this is splitting a PMD, fix it up. PUD splits cannot be
				926	* fixed trivially as that would require to rescan the newly
				927	* installed PMD mappings after returning from split_large_page()
				928	* so an eventual further split can allocate the necessary PTE
				929	* pages. Warn for now and revisit it in case this actually
				930	* happens.
				931	*/
				932	if (size == PAGE_SIZE)
				933	ref_prot = prot;
				934	else
				935	pr_warn_once("CPA: Cannot fixup static protections for PUD split\n");
				936	set:
				937	set_pte(pte, pfn_pte(pfn, ref_prot));
				938	}
				939
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	940	static int
				941	__split_large_page(struct cpa_data cpa, pte_t kpte, unsigned long address,
				942	struct page *base)
				943	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	944	unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	945	pte_t pbase = (pte_t )page_address(base);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	946	unsigned int i, level;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	947	pgprot_t ref_prot;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	948	pte_t *tmp;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	949
				950	spin_lock(&pgd_lock);
				951	/*
				952	* Check for races, another CPU might have split this page
				953	* up for us already:
				954	*/
				955	tmp = _lookup_address_cpa(cpa, address, &level);
				956	if (tmp != kpte) {
				957	spin_unlock(&pgd_lock);
				958	return 1;
				959	}
				960
				961	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
				962
				963	switch (level) {
				964	case PG_LEVEL_2M:
				965	ref_prot = pmd_pgprot((pmd_t )kpte);
				966	/*
				967	* Clear PSE (aka _PAGE_PAT) and move
				968	* PAT bit to correct position.
				969	*/
				970	ref_prot = pgprot_large_2_4k(ref_prot);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	971	ref_pfn = pmd_pfn((pmd_t )kpte);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	972	lpaddr = address & PMD_MASK;
				973	lpinc = PAGE_SIZE;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	974	break;
				975
				976	case PG_LEVEL_1G:
				977	ref_prot = pud_pgprot((pud_t )kpte);
				978	ref_pfn = pud_pfn((pud_t )kpte);
				979	pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	980	lpaddr = address & PUD_MASK;
				981	lpinc = PMD_SIZE;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	982	/*
				983	* Clear the PSE flags if the PRESENT flag is not set
				984	* otherwise pmd_present/pmd_huge will return true
				985	* even on a non present pmd.
				986	*/
				987	if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
				988	pgprot_val(ref_prot) &= ~_PAGE_PSE;
				989	break;
				990
				991	default:
				992	spin_unlock(&pgd_lock);
				993	return 1;
				994	}
				995
				996	ref_prot = pgprot_clear_protnone_bits(ref_prot);
				997
				998	/*
				999	* Get the target pfn from the original entry:
				1000	*/
				1001	pfn = ref_pfn;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1002	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc)
				1003	split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1004
				1005	if (virt_addr_valid(address)) {
				1006	unsigned long pfn = PFN_DOWN(__pa(address));
				1007
				1008	if (pfn_range_is_mapped(pfn, pfn + 1))
				1009	split_page_count(level);
				1010	}
				1011
				1012	/*
				1013	* Install the new, split up pagetable.
				1014	*
				1015	* We use the standard kernel pagetable protections for the new
				1016	* pagetable protections, the actual ptes set above control the
				1017	* primary protection behavior:
				1018	*/
				1019	__set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
				1020
				1021	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1022	* Do a global flush tlb after splitting the large page
				1023	* and before we do the actual change page attribute in the PTE.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1024	*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1025	* Without this, we violate the TLB application note, that says:
				1026	* "The TLBs may contain both ordinary and large-page
				1027	* translations for a 4-KByte range of linear addresses. This
				1028	* may occur if software modifies the paging structures so that
				1029	* the page size used for the address range changes. If the two
				1030	* translations differ with respect to page frame or attributes
				1031	* (e.g., permissions), processor behavior is undefined and may
				1032	* be implementation-specific."
				1033	*
				1034	* We do this global tlb flush inside the cpa_lock, so that we
				1035	* don't allow any other cpu, with stale tlb entries change the
				1036	* page attribute in parallel, that also falls into the
				1037	* just split large page entry.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1038	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1039	flush_tlb_all();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1040	spin_unlock(&pgd_lock);
				1041
				1042	return 0;
				1043	}
				1044
				1045	static int split_large_page(struct cpa_data cpa, pte_t kpte,
				1046	unsigned long address)
				1047	{
				1048	struct page *base;
				1049
				1050	if (!debug_pagealloc_enabled())
				1051	spin_unlock(&cpa_lock);
				1052	base = alloc_pages(GFP_KERNEL, 0);
				1053	if (!debug_pagealloc_enabled())
				1054	spin_lock(&cpa_lock);
				1055	if (!base)
				1056	return -ENOMEM;
				1057
				1058	if (__split_large_page(cpa, kpte, address, base))
				1059	__free_page(base);
				1060
				1061	return 0;
				1062	}
				1063
				1064	static bool try_to_free_pte_page(pte_t *pte)
				1065	{
				1066	int i;
				1067
				1068	for (i = 0; i < PTRS_PER_PTE; i++)
				1069	if (!pte_none(pte[i]))
				1070	return false;
				1071
				1072	free_page((unsigned long)pte);
				1073	return true;
				1074	}
				1075
				1076	static bool try_to_free_pmd_page(pmd_t *pmd)
				1077	{
				1078	int i;
				1079
				1080	for (i = 0; i < PTRS_PER_PMD; i++)
				1081	if (!pmd_none(pmd[i]))
				1082	return false;
				1083
				1084	free_page((unsigned long)pmd);
				1085	return true;
				1086	}
				1087
				1088	static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
				1089	{
				1090	pte_t *pte = pte_offset_kernel(pmd, start);
				1091
				1092	while (start < end) {
				1093	set_pte(pte, __pte(0));
				1094
				1095	start += PAGE_SIZE;
				1096	pte++;
				1097	}
				1098
				1099	if (try_to_free_pte_page((pte_t )pmd_page_vaddr(pmd))) {
				1100	pmd_clear(pmd);
				1101	return true;
				1102	}
				1103	return false;
				1104	}
				1105
				1106	static void __unmap_pmd_range(pud_t pud, pmd_t pmd,
				1107	unsigned long start, unsigned long end)
				1108	{
				1109	if (unmap_pte_range(pmd, start, end))
				1110	if (try_to_free_pmd_page((pmd_t )pud_page_vaddr(pud)))
				1111	pud_clear(pud);
				1112	}
				1113
				1114	static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
				1115	{
				1116	pmd_t *pmd = pmd_offset(pud, start);
				1117
				1118	/*
				1119	* Not on a 2MB page boundary?
				1120	*/
				1121	if (start & (PMD_SIZE - 1)) {
				1122	unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
				1123	unsigned long pre_end = min_t(unsigned long, end, next_page);
				1124
				1125	__unmap_pmd_range(pud, pmd, start, pre_end);
				1126
				1127	start = pre_end;
				1128	pmd++;
				1129	}
				1130
				1131	/*
				1132	* Try to unmap in 2M chunks.
				1133	*/
				1134	while (end - start >= PMD_SIZE) {
				1135	if (pmd_large(*pmd))
				1136	pmd_clear(pmd);
				1137	else
				1138	__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
				1139
				1140	start += PMD_SIZE;
				1141	pmd++;
				1142	}
				1143
				1144	/*
				1145	* 4K leftovers?
				1146	*/
				1147	if (start < end)
				1148	return __unmap_pmd_range(pud, pmd, start, end);
				1149
				1150	/*
				1151	* Try again to free the PMD page if haven't succeeded above.
				1152	*/
				1153	if (!pud_none(*pud))
				1154	if (try_to_free_pmd_page((pmd_t )pud_page_vaddr(pud)))
				1155	pud_clear(pud);
				1156	}
				1157
				1158	static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
				1159	{
				1160	pud_t *pud = pud_offset(p4d, start);
				1161
				1162	/*
				1163	* Not on a GB page boundary?
				1164	*/
				1165	if (start & (PUD_SIZE - 1)) {
				1166	unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
				1167	unsigned long pre_end = min_t(unsigned long, end, next_page);
				1168
				1169	unmap_pmd_range(pud, start, pre_end);
				1170
				1171	start = pre_end;
				1172	pud++;
				1173	}
				1174
				1175	/*
				1176	* Try to unmap in 1G chunks?
				1177	*/
				1178	while (end - start >= PUD_SIZE) {
				1179
				1180	if (pud_large(*pud))
				1181	pud_clear(pud);
				1182	else
				1183	unmap_pmd_range(pud, start, start + PUD_SIZE);
				1184
				1185	start += PUD_SIZE;
				1186	pud++;
				1187	}
				1188
				1189	/*
				1190	* 2M leftovers?
				1191	*/
				1192	if (start < end)
				1193	unmap_pmd_range(pud, start, end);
				1194
				1195	/*
				1196	* No need to try to free the PUD page because we'll free it in
				1197	* populate_pgd's error path
				1198	*/
				1199	}
				1200
				1201	static int alloc_pte_page(pmd_t *pmd)
				1202	{
				1203	pte_t pte = (pte_t )get_zeroed_page(GFP_KERNEL);
				1204	if (!pte)
				1205	return -1;
				1206
				1207	set_pmd(pmd, __pmd(__pa(pte) \| _KERNPG_TABLE));
				1208	return 0;
				1209	}
				1210
				1211	static int alloc_pmd_page(pud_t *pud)
				1212	{
				1213	pmd_t pmd = (pmd_t )get_zeroed_page(GFP_KERNEL);
				1214	if (!pmd)
				1215	return -1;
				1216
				1217	set_pud(pud, __pud(__pa(pmd) \| _KERNPG_TABLE));
				1218	return 0;
				1219	}
				1220
				1221	static void populate_pte(struct cpa_data *cpa,
				1222	unsigned long start, unsigned long end,
				1223	unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
				1224	{
				1225	pte_t *pte;
				1226
				1227	pte = pte_offset_kernel(pmd, start);
				1228
				1229	pgprot = pgprot_clear_protnone_bits(pgprot);
				1230
				1231	while (num_pages-- && start < end) {
				1232	set_pte(pte, pfn_pte(cpa->pfn, pgprot));
				1233
				1234	start += PAGE_SIZE;
				1235	cpa->pfn++;
				1236	pte++;
				1237	}
				1238	}
				1239
				1240	static long populate_pmd(struct cpa_data *cpa,
				1241	unsigned long start, unsigned long end,
				1242	unsigned num_pages, pud_t *pud, pgprot_t pgprot)
				1243	{
				1244	long cur_pages = 0;
				1245	pmd_t *pmd;
				1246	pgprot_t pmd_pgprot;
				1247
				1248	/*
				1249	* Not on a 2M boundary?
				1250	*/
				1251	if (start & (PMD_SIZE - 1)) {
				1252	unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
				1253	unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
				1254
				1255	pre_end = min_t(unsigned long, pre_end, next_page);
				1256	cur_pages = (pre_end - start) >> PAGE_SHIFT;
				1257	cur_pages = min_t(unsigned int, num_pages, cur_pages);
				1258
				1259	/*
				1260	* Need a PTE page?
				1261	*/
				1262	pmd = pmd_offset(pud, start);
				1263	if (pmd_none(*pmd))
				1264	if (alloc_pte_page(pmd))
				1265	return -1;
				1266
				1267	populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
				1268
				1269	start = pre_end;
				1270	}
				1271
				1272	/*
				1273	* We mapped them all?
				1274	*/
				1275	if (num_pages == cur_pages)
				1276	return cur_pages;
				1277
				1278	pmd_pgprot = pgprot_4k_2_large(pgprot);
				1279
				1280	while (end - start >= PMD_SIZE) {
				1281
				1282	/*
				1283	* We cannot use a 1G page so allocate a PMD page if needed.
				1284	*/
				1285	if (pud_none(*pud))
				1286	if (alloc_pmd_page(pud))
				1287	return -1;
				1288
				1289	pmd = pmd_offset(pud, start);
				1290
				1291	set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
				1292	canon_pgprot(pmd_pgprot))));
				1293
				1294	start += PMD_SIZE;
				1295	cpa->pfn += PMD_SIZE >> PAGE_SHIFT;
				1296	cur_pages += PMD_SIZE >> PAGE_SHIFT;
				1297	}
				1298
				1299	/*
				1300	* Map trailing 4K pages.
				1301	*/
				1302	if (start < end) {
				1303	pmd = pmd_offset(pud, start);
				1304	if (pmd_none(*pmd))
				1305	if (alloc_pte_page(pmd))
				1306	return -1;
				1307
				1308	populate_pte(cpa, start, end, num_pages - cur_pages,
				1309	pmd, pgprot);
				1310	}
				1311	return num_pages;
				1312	}
				1313
				1314	static int populate_pud(struct cpa_data cpa, unsigned long start, p4d_t p4d,
				1315	pgprot_t pgprot)
				1316	{
				1317	pud_t *pud;
				1318	unsigned long end;
				1319	long cur_pages = 0;
				1320	pgprot_t pud_pgprot;
				1321
				1322	end = start + (cpa->numpages << PAGE_SHIFT);
				1323
				1324	/*
				1325	* Not on a Gb page boundary? => map everything up to it with
				1326	* smaller pages.
				1327	*/
				1328	if (start & (PUD_SIZE - 1)) {
				1329	unsigned long pre_end;
				1330	unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
				1331
				1332	pre_end = min_t(unsigned long, end, next_page);
				1333	cur_pages = (pre_end - start) >> PAGE_SHIFT;
				1334	cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
				1335
				1336	pud = pud_offset(p4d, start);
				1337
				1338	/*
				1339	* Need a PMD page?
				1340	*/
				1341	if (pud_none(*pud))
				1342	if (alloc_pmd_page(pud))
				1343	return -1;
				1344
				1345	cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
				1346	pud, pgprot);
				1347	if (cur_pages < 0)
				1348	return cur_pages;
				1349
				1350	start = pre_end;
				1351	}
				1352
				1353	/* We mapped them all? */
				1354	if (cpa->numpages == cur_pages)
				1355	return cur_pages;
				1356
				1357	pud = pud_offset(p4d, start);
				1358	pud_pgprot = pgprot_4k_2_large(pgprot);
				1359
				1360	/*
				1361	* Map everything starting from the Gb boundary, possibly with 1G pages
				1362	*/
				1363	while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
				1364	set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
				1365	canon_pgprot(pud_pgprot))));
				1366
				1367	start += PUD_SIZE;
				1368	cpa->pfn += PUD_SIZE >> PAGE_SHIFT;
				1369	cur_pages += PUD_SIZE >> PAGE_SHIFT;
				1370	pud++;
				1371	}
				1372
				1373	/* Map trailing leftover */
				1374	if (start < end) {
				1375	long tmp;
				1376
				1377	pud = pud_offset(p4d, start);
				1378	if (pud_none(*pud))
				1379	if (alloc_pmd_page(pud))
				1380	return -1;
				1381
				1382	tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
				1383	pud, pgprot);
				1384	if (tmp < 0)
				1385	return cur_pages;
				1386
				1387	cur_pages += tmp;
				1388	}
				1389	return cur_pages;
				1390	}
				1391
				1392	/*
				1393	* Restrictions for kernel page table do not necessarily apply when mapping in
				1394	* an alternate PGD.
				1395	*/
				1396	static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
				1397	{
				1398	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
				1399	pud_t pud = NULL; / shut up gcc */
				1400	p4d_t *p4d;
				1401	pgd_t *pgd_entry;
				1402	long ret;
				1403
				1404	pgd_entry = cpa->pgd + pgd_index(addr);
				1405
				1406	if (pgd_none(*pgd_entry)) {
				1407	p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
				1408	if (!p4d)
				1409	return -1;
				1410
				1411	set_pgd(pgd_entry, __pgd(__pa(p4d) \| _KERNPG_TABLE));
				1412	}
				1413
				1414	/*
				1415	* Allocate a PUD page and hand it down for mapping.
				1416	*/
				1417	p4d = p4d_offset(pgd_entry, addr);
				1418	if (p4d_none(*p4d)) {
				1419	pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
				1420	if (!pud)
				1421	return -1;
				1422
				1423	set_p4d(p4d, __p4d(__pa(pud) \| _KERNPG_TABLE));
				1424	}
				1425
				1426	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
				1427	pgprot_val(pgprot) \|= pgprot_val(cpa->mask_set);
				1428
				1429	ret = populate_pud(cpa, addr, p4d, pgprot);
				1430	if (ret < 0) {
				1431	/*
				1432	* Leave the PUD page in place in case some other CPU or thread
				1433	* already found it, but remove any useless entries we just
				1434	* added to it.
				1435	*/
				1436	unmap_pud_range(p4d, addr,
				1437	addr + (cpa->numpages << PAGE_SHIFT));
				1438	return ret;
				1439	}
				1440
				1441	cpa->numpages = ret;
				1442	return 0;
				1443	}
				1444
				1445	static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
				1446	int primary)
				1447	{
				1448	if (cpa->pgd) {
				1449	/*
				1450	* Right now, we only execute this code path when mapping
				1451	* the EFI virtual memory map regions, no other users
				1452	* provide a ->pgd value. This may change in the future.
				1453	*/
				1454	return populate_pgd(cpa, vaddr);
				1455	}
				1456
				1457	/*
				1458	* Ignore all non primary paths.
				1459	*/
				1460	if (!primary) {
				1461	cpa->numpages = 1;
				1462	return 0;
				1463	}
				1464
				1465	/*
				1466	* Ignore the NULL PTE for kernel identity mapping, as it is expected
				1467	* to have holes.
				1468	* Also set numpages to '1' indicating that we processed cpa req for
				1469	* one virtual address page and its pfn. TBD: numpages can be set based
				1470	* on the initial value and the level returned by lookup_address().
				1471	*/
				1472	if (within(vaddr, PAGE_OFFSET,
				1473	PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
				1474	cpa->numpages = 1;
				1475	cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
				1476	return 0;
				1477
				1478	} else if (__cpa_pfn_in_highmap(cpa->pfn)) {
				1479	/* Faults in the highmap are OK, so do not warn: */
				1480	return -EFAULT;
				1481	} else {
				1482	WARN(1, KERN_WARNING "CPA: called for zero pte. "
				1483	"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
				1484	*cpa->vaddr);
				1485
				1486	return -EFAULT;
				1487	}
				1488	}
				1489
				1490	static int __change_page_attr(struct cpa_data *cpa, int primary)
				1491	{
				1492	unsigned long address;
				1493	int do_split, err;
				1494	unsigned int level;
				1495	pte_t *kpte, old_pte;
				1496
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1497	address = __cpa_addr(cpa, cpa->curpage);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1498	repeat:
				1499	kpte = _lookup_address_cpa(cpa, address, &level);
				1500	if (!kpte)
				1501	return __cpa_process_fault(cpa, address, primary);
				1502
				1503	old_pte = *kpte;
				1504	if (pte_none(old_pte))
				1505	return __cpa_process_fault(cpa, address, primary);
				1506
				1507	if (level == PG_LEVEL_4K) {
				1508	pte_t new_pte;
				1509	pgprot_t new_prot = pte_pgprot(old_pte);
				1510	unsigned long pfn = pte_pfn(old_pte);
				1511
				1512	pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
				1513	pgprot_val(new_prot) \|= pgprot_val(cpa->mask_set);
				1514
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1515	cpa_inc_4k_install();
				1516	/* Hand in lpsize = 0 to enforce the protection mechanism */
				1517	new_prot = static_protections(new_prot, address, pfn, 1, 0,
				1518	CPA_PROTECT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1519
				1520	new_prot = pgprot_clear_protnone_bits(new_prot);
				1521
				1522	/*
				1523	* We need to keep the pfn from the existing PTE,
				1524	* after all we're only going to change it's attributes
				1525	* not the memory it points to
				1526	*/
				1527	new_pte = pfn_pte(pfn, new_prot);
				1528	cpa->pfn = pfn;
				1529	/*
				1530	* Do we really change anything ?
				1531	*/
				1532	if (pte_val(old_pte) != pte_val(new_pte)) {
				1533	set_pte_atomic(kpte, new_pte);
				1534	cpa->flags \|= CPA_FLUSHTLB;
				1535	}
				1536	cpa->numpages = 1;
				1537	return 0;
				1538	}
				1539
				1540	/*
				1541	* Check, whether we can keep the large page intact
				1542	* and just change the pte:
				1543	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1544	do_split = should_split_large_page(kpte, address, cpa);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1545	/*
				1546	* When the range fits into the existing large page,
				1547	* return. cp->numpages and cpa->tlbflush have been updated in
				1548	* try_large_page:
				1549	*/
				1550	if (do_split <= 0)
				1551	return do_split;
				1552
				1553	/*
				1554	* We have to split the large page:
				1555	*/
				1556	err = split_large_page(cpa, kpte, address);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1557	if (!err)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1558	goto repeat;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1559
				1560	return err;
				1561	}
				1562
				1563	static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
				1564
				1565	static int cpa_process_alias(struct cpa_data *cpa)
				1566	{
				1567	struct cpa_data alias_cpa;
				1568	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
				1569	unsigned long vaddr;
				1570	int ret;
				1571
				1572	if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
				1573	return 0;
				1574
				1575	/*
				1576	* No need to redo, when the primary call touched the direct
				1577	* mapping already:
				1578	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1579	vaddr = __cpa_addr(cpa, cpa->curpage);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1580	if (!(within(vaddr, PAGE_OFFSET,
				1581	PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
				1582
				1583	alias_cpa = *cpa;
				1584	alias_cpa.vaddr = &laddr;
				1585	alias_cpa.flags &= ~(CPA_PAGES_ARRAY \| CPA_ARRAY);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1586	alias_cpa.curpage = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1587
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1588	cpa->force_flush_all = 1;
				1589
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1590	ret = __change_page_attr_set_clr(&alias_cpa, 0);
				1591	if (ret)
				1592	return ret;
				1593	}
				1594
				1595	#ifdef CONFIG_X86_64
				1596	/*
				1597	* If the primary call didn't touch the high mapping already
				1598	* and the physical address is inside the kernel map, we need
				1599	* to touch the high mapped kernel as well:
				1600	*/
				1601	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
				1602	__cpa_pfn_in_highmap(cpa->pfn)) {
				1603	unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
				1604	__START_KERNEL_map - phys_base;
				1605	alias_cpa = *cpa;
				1606	alias_cpa.vaddr = &temp_cpa_vaddr;
				1607	alias_cpa.flags &= ~(CPA_PAGES_ARRAY \| CPA_ARRAY);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1608	alias_cpa.curpage = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1609
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1610	cpa->force_flush_all = 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1611	/*
				1612	* The high mapping range is imprecise, so ignore the
				1613	* return value.
				1614	*/
				1615	__change_page_attr_set_clr(&alias_cpa, 0);
				1616	}
				1617	#endif
				1618
				1619	return 0;
				1620	}
				1621
				1622	static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
				1623	{
				1624	unsigned long numpages = cpa->numpages;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1625	unsigned long rempages = numpages;
				1626	int ret = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1627
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1628	while (rempages) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1629	/*
				1630	* Store the remaining nr of pages for the large page
				1631	* preservation check.
				1632	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1633	cpa->numpages = rempages;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1634	/* for array changes, we can't use large page */
				1635	if (cpa->flags & (CPA_ARRAY \| CPA_PAGES_ARRAY))
				1636	cpa->numpages = 1;
				1637
				1638	if (!debug_pagealloc_enabled())
				1639	spin_lock(&cpa_lock);
				1640	ret = __change_page_attr(cpa, checkalias);
				1641	if (!debug_pagealloc_enabled())
				1642	spin_unlock(&cpa_lock);
				1643	if (ret)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1644	goto out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1645
				1646	if (checkalias) {
				1647	ret = cpa_process_alias(cpa);
				1648	if (ret)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1649	goto out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1650	}
				1651
				1652	/*
				1653	* Adjust the number of pages with the result of the
				1654	* CPA operation. Either a large page has been
				1655	* preserved or a single page update happened.
				1656	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1657	BUG_ON(cpa->numpages > rempages \|\| !cpa->numpages);
				1658	rempages -= cpa->numpages;
				1659	cpa->curpage += cpa->numpages;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1660	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1661
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1662	out:
				1663	/* Restore the original numpages */
				1664	cpa->numpages = numpages;
				1665	return ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1666	}
				1667
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1668	static int change_page_attr_set_clr(unsigned long *addr, int numpages,
				1669	pgprot_t mask_set, pgprot_t mask_clr,
				1670	int force_split, int in_flag,
				1671	struct page **pages)
				1672	{
				1673	struct cpa_data cpa;
				1674	int ret, cache, checkalias;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1675
				1676	memset(&cpa, 0, sizeof(cpa));
				1677
				1678	/*
				1679	* Check, if we are requested to set a not supported
				1680	* feature. Clearing non-supported features is OK.
				1681	*/
				1682	mask_set = canon_pgprot(mask_set);
				1683
				1684	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
				1685	return 0;
				1686
				1687	/* Ensure we are PAGE_SIZE aligned */
				1688	if (in_flag & CPA_ARRAY) {
				1689	int i;
				1690	for (i = 0; i < numpages; i++) {
				1691	if (addr[i] & ~PAGE_MASK) {
				1692	addr[i] &= PAGE_MASK;
				1693	WARN_ON_ONCE(1);
				1694	}
				1695	}
				1696	} else if (!(in_flag & CPA_PAGES_ARRAY)) {
				1697	/*
				1698	* in_flag of CPA_PAGES_ARRAY implies it is aligned.
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1699	* No need to check in that case
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1700	*/
				1701	if (*addr & ~PAGE_MASK) {
				1702	*addr &= PAGE_MASK;
				1703	/*
				1704	* People should not be passing in unaligned addresses:
				1705	*/
				1706	WARN_ON_ONCE(1);
				1707	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1708	}
				1709
				1710	/* Must avoid aliasing mappings in the highmem code */
				1711	kmap_flush_unused();
				1712
				1713	vm_unmap_aliases();
				1714
				1715	cpa.vaddr = addr;
				1716	cpa.pages = pages;
				1717	cpa.numpages = numpages;
				1718	cpa.mask_set = mask_set;
				1719	cpa.mask_clr = mask_clr;
				1720	cpa.flags = 0;
				1721	cpa.curpage = 0;
				1722	cpa.force_split = force_split;
				1723
				1724	if (in_flag & (CPA_ARRAY \| CPA_PAGES_ARRAY))
				1725	cpa.flags \|= in_flag;
				1726
				1727	/* No alias checking for _NX bit modifications */
				1728	checkalias = (pgprot_val(mask_set) \| pgprot_val(mask_clr)) != _PAGE_NX;
				1729	/* Has caller explicitly disabled alias checking? */
				1730	if (in_flag & CPA_NO_CHECK_ALIAS)
				1731	checkalias = 0;
				1732
				1733	ret = __change_page_attr_set_clr(&cpa, checkalias);
				1734
				1735	/*
				1736	* Check whether we really changed something:
				1737	*/
				1738	if (!(cpa.flags & CPA_FLUSHTLB))
				1739	goto out;
				1740
				1741	/*
				1742	* No need to flush, when we did not set any of the caching
				1743	* attributes:
				1744	*/
				1745	cache = !!pgprot2cachemode(mask_set);
				1746
				1747	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1748	* On error; flush everything to be sure.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1749	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1750	if (ret) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1751	cpa_flush_all(cache);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1752	goto out;
				1753	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1754
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1755	cpa_flush(&cpa, cache);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1756	out:
				1757	return ret;
				1758	}
				1759
				1760	static inline int change_page_attr_set(unsigned long *addr, int numpages,
				1761	pgprot_t mask, int array)
				1762	{
				1763	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
				1764	(array ? CPA_ARRAY : 0), NULL);
				1765	}
				1766
				1767	static inline int change_page_attr_clear(unsigned long *addr, int numpages,
				1768	pgprot_t mask, int array)
				1769	{
				1770	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
				1771	(array ? CPA_ARRAY : 0), NULL);
				1772	}
				1773
				1774	static inline int cpa_set_pages_array(struct page **pages, int numpages,
				1775	pgprot_t mask)
				1776	{
				1777	return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
				1778	CPA_PAGES_ARRAY, pages);
				1779	}
				1780
				1781	static inline int cpa_clear_pages_array(struct page **pages, int numpages,
				1782	pgprot_t mask)
				1783	{
				1784	return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
				1785	CPA_PAGES_ARRAY, pages);
				1786	}
				1787
				1788	int _set_memory_uc(unsigned long addr, int numpages)
				1789	{
				1790	/*
				1791	* for now UC MINUS. see comments in ioremap_nocache()
				1792	* If you really need strong UC use ioremap_uc(), but note
				1793	* that you cannot override IO areas with set_memory_*() as
				1794	* these helpers cannot work with IO memory.
				1795	*/
				1796	return change_page_attr_set(&addr, numpages,
				1797	cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
				1798	0);
				1799	}
				1800
				1801	int set_memory_uc(unsigned long addr, int numpages)
				1802	{
				1803	int ret;
				1804
				1805	/*
				1806	* for now UC MINUS. see comments in ioremap_nocache()
				1807	*/
				1808	ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
				1809	_PAGE_CACHE_MODE_UC_MINUS, NULL);
				1810	if (ret)
				1811	goto out_err;
				1812
				1813	ret = _set_memory_uc(addr, numpages);
				1814	if (ret)
				1815	goto out_free;
				1816
				1817	return 0;
				1818
				1819	out_free:
				1820	free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
				1821	out_err:
				1822	return ret;
				1823	}
				1824	EXPORT_SYMBOL(set_memory_uc);
				1825
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1826	int _set_memory_wc(unsigned long addr, int numpages)
				1827	{
				1828	int ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1829
				1830	ret = change_page_attr_set(&addr, numpages,
				1831	cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
				1832	0);
				1833	if (!ret) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1834	ret = change_page_attr_set_clr(&addr, numpages,
				1835	cachemode2pgprot(_PAGE_CACHE_MODE_WC),
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1836	__pgprot(_PAGE_CACHE_MASK),
				1837	0, 0, NULL);
				1838	}
				1839	return ret;
				1840	}
				1841
				1842	int set_memory_wc(unsigned long addr, int numpages)
				1843	{
				1844	int ret;
				1845
				1846	ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
				1847	_PAGE_CACHE_MODE_WC, NULL);
				1848	if (ret)
				1849	return ret;
				1850
				1851	ret = _set_memory_wc(addr, numpages);
				1852	if (ret)
				1853	free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
				1854
				1855	return ret;
				1856	}
				1857	EXPORT_SYMBOL(set_memory_wc);
				1858
				1859	int _set_memory_wt(unsigned long addr, int numpages)
				1860	{
				1861	return change_page_attr_set(&addr, numpages,
				1862	cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
				1863	}
				1864
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1865	int _set_memory_wb(unsigned long addr, int numpages)
				1866	{
				1867	/* WB cache mode is hard wired to all cache attribute bits being 0 */
				1868	return change_page_attr_clear(&addr, numpages,
				1869	__pgprot(_PAGE_CACHE_MASK), 0);
				1870	}
				1871
				1872	int set_memory_wb(unsigned long addr, int numpages)
				1873	{
				1874	int ret;
				1875
				1876	ret = _set_memory_wb(addr, numpages);
				1877	if (ret)
				1878	return ret;
				1879
				1880	free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
				1881	return 0;
				1882	}
				1883	EXPORT_SYMBOL(set_memory_wb);
				1884
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1885	int set_memory_x(unsigned long addr, int numpages)
				1886	{
				1887	if (!(__supported_pte_mask & _PAGE_NX))
				1888	return 0;
				1889
				1890	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
				1891	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1892
				1893	int set_memory_nx(unsigned long addr, int numpages)
				1894	{
				1895	if (!(__supported_pte_mask & _PAGE_NX))
				1896	return 0;
				1897
				1898	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
				1899	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1900
				1901	int set_memory_ro(unsigned long addr, int numpages)
				1902	{
				1903	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
				1904	}
				1905
				1906	int set_memory_rw(unsigned long addr, int numpages)
				1907	{
				1908	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
				1909	}
				1910
				1911	int set_memory_np(unsigned long addr, int numpages)
				1912	{
				1913	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
				1914	}
				1915
				1916	int set_memory_np_noalias(unsigned long addr, int numpages)
				1917	{
				1918	int cpa_flags = CPA_NO_CHECK_ALIAS;
				1919
				1920	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
				1921	__pgprot(_PAGE_PRESENT), 0,
				1922	cpa_flags, NULL);
				1923	}
				1924
				1925	int set_memory_4k(unsigned long addr, int numpages)
				1926	{
				1927	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
				1928	__pgprot(0), 1, 0, NULL);
				1929	}
				1930
				1931	int set_memory_nonglobal(unsigned long addr, int numpages)
				1932	{
				1933	return change_page_attr_clear(&addr, numpages,
				1934	__pgprot(_PAGE_GLOBAL), 0);
				1935	}
				1936
				1937	int set_memory_global(unsigned long addr, int numpages)
				1938	{
				1939	return change_page_attr_set(&addr, numpages,
				1940	__pgprot(_PAGE_GLOBAL), 0);
				1941	}
				1942
				1943	static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
				1944	{
				1945	struct cpa_data cpa;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1946	int ret;
				1947
				1948	/* Nothing to do if memory encryption is not active */
				1949	if (!mem_encrypt_active())
				1950	return 0;
				1951
				1952	/* Should not be working on unaligned addresses */
				1953	if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
				1954	addr &= PAGE_MASK;
				1955
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1956	memset(&cpa, 0, sizeof(cpa));
				1957	cpa.vaddr = &addr;
				1958	cpa.numpages = numpages;
				1959	cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
				1960	cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
				1961	cpa.pgd = init_mm.pgd;
				1962
				1963	/* Must avoid aliasing mappings in the highmem code */
				1964	kmap_flush_unused();
				1965	vm_unmap_aliases();
				1966
				1967	/*
				1968	* Before changing the encryption attribute, we need to flush caches.
				1969	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1970	cpa_flush(&cpa, 1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1971
				1972	ret = __change_page_attr_set_clr(&cpa, 1);
				1973
				1974	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1975	* After changing the encryption attribute, we need to flush TLBs again
				1976	* in case any speculative TLB caching occurred (but no need to flush
				1977	* caches again). We could just use cpa_flush_all(), but in case TLB
				1978	* flushing gets optimized in the cpa_flush() path use the same logic
				1979	* as above.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1980	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1981	cpa_flush(&cpa, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1982
				1983	return ret;
				1984	}
				1985
				1986	int set_memory_encrypted(unsigned long addr, int numpages)
				1987	{
				1988	return __set_memory_enc_dec(addr, numpages, true);
				1989	}
				1990	EXPORT_SYMBOL_GPL(set_memory_encrypted);
				1991
				1992	int set_memory_decrypted(unsigned long addr, int numpages)
				1993	{
				1994	return __set_memory_enc_dec(addr, numpages, false);
				1995	}
				1996	EXPORT_SYMBOL_GPL(set_memory_decrypted);
				1997
				1998	int set_pages_uc(struct page *page, int numpages)
				1999	{
				2000	unsigned long addr = (unsigned long)page_address(page);
				2001
				2002	return set_memory_uc(addr, numpages);
				2003	}
				2004	EXPORT_SYMBOL(set_pages_uc);
				2005
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2006	static int _set_pages_array(struct page **pages, int numpages,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2007	enum page_cache_mode new_type)
				2008	{
				2009	unsigned long start;
				2010	unsigned long end;
				2011	enum page_cache_mode set_type;
				2012	int i;
				2013	int free_idx;
				2014	int ret;
				2015
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2016	for (i = 0; i < numpages; i++) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2017	if (PageHighMem(pages[i]))
				2018	continue;
				2019	start = page_to_pfn(pages[i]) << PAGE_SHIFT;
				2020	end = start + PAGE_SIZE;
				2021	if (reserve_memtype(start, end, new_type, NULL))
				2022	goto err_out;
				2023	}
				2024
				2025	/* If WC, set to UC- first and then WC */
				2026	set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
				2027	_PAGE_CACHE_MODE_UC_MINUS : new_type;
				2028
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2029	ret = cpa_set_pages_array(pages, numpages,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2030	cachemode2pgprot(set_type));
				2031	if (!ret && new_type == _PAGE_CACHE_MODE_WC)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2032	ret = change_page_attr_set_clr(NULL, numpages,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2033	cachemode2pgprot(
				2034	_PAGE_CACHE_MODE_WC),
				2035	__pgprot(_PAGE_CACHE_MASK),
				2036	0, CPA_PAGES_ARRAY, pages);
				2037	if (ret)
				2038	goto err_out;
				2039	return 0; /* Success */
				2040	err_out:
				2041	free_idx = i;
				2042	for (i = 0; i < free_idx; i++) {
				2043	if (PageHighMem(pages[i]))
				2044	continue;
				2045	start = page_to_pfn(pages[i]) << PAGE_SHIFT;
				2046	end = start + PAGE_SIZE;
				2047	free_memtype(start, end);
				2048	}
				2049	return -EINVAL;
				2050	}
				2051
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2052	int set_pages_array_uc(struct page **pages, int numpages)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2053	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2054	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2055	}
				2056	EXPORT_SYMBOL(set_pages_array_uc);
				2057
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2058	int set_pages_array_wc(struct page **pages, int numpages)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2059	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2060	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2061	}
				2062	EXPORT_SYMBOL(set_pages_array_wc);
				2063
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2064	int set_pages_array_wt(struct page **pages, int numpages)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2065	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2066	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2067	}
				2068	EXPORT_SYMBOL_GPL(set_pages_array_wt);
				2069
				2070	int set_pages_wb(struct page *page, int numpages)
				2071	{
				2072	unsigned long addr = (unsigned long)page_address(page);
				2073
				2074	return set_memory_wb(addr, numpages);
				2075	}
				2076	EXPORT_SYMBOL(set_pages_wb);
				2077
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2078	int set_pages_array_wb(struct page **pages, int numpages)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2079	{
				2080	int retval;
				2081	unsigned long start;
				2082	unsigned long end;
				2083	int i;
				2084
				2085	/* WB cache mode is hard wired to all cache attribute bits being 0 */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2086	retval = cpa_clear_pages_array(pages, numpages,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2087	__pgprot(_PAGE_CACHE_MASK));
				2088	if (retval)
				2089	return retval;
				2090
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2091	for (i = 0; i < numpages; i++) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2092	if (PageHighMem(pages[i]))
				2093	continue;
				2094	start = page_to_pfn(pages[i]) << PAGE_SHIFT;
				2095	end = start + PAGE_SIZE;
				2096	free_memtype(start, end);
				2097	}
				2098
				2099	return 0;
				2100	}
				2101	EXPORT_SYMBOL(set_pages_array_wb);
				2102
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2103	int set_pages_ro(struct page *page, int numpages)
				2104	{
				2105	unsigned long addr = (unsigned long)page_address(page);
				2106
				2107	return set_memory_ro(addr, numpages);
				2108	}
				2109
				2110	int set_pages_rw(struct page *page, int numpages)
				2111	{
				2112	unsigned long addr = (unsigned long)page_address(page);
				2113
				2114	return set_memory_rw(addr, numpages);
				2115	}
				2116
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2117	static int __set_pages_p(struct page *page, int numpages)
				2118	{
				2119	unsigned long tempaddr = (unsigned long) page_address(page);
				2120	struct cpa_data cpa = { .vaddr = &tempaddr,
				2121	.pgd = NULL,
				2122	.numpages = numpages,
				2123	.mask_set = __pgprot(_PAGE_PRESENT \| _PAGE_RW),
				2124	.mask_clr = __pgprot(0),
				2125	.flags = 0};
				2126
				2127	/*
				2128	* No alias checking needed for setting present flag. otherwise,
				2129	* we may need to break large pages for 64-bit kernel text
				2130	* mappings (this adds to complexity if we want to do this from
				2131	* atomic context especially). Let's keep it simple!
				2132	*/
				2133	return __change_page_attr_set_clr(&cpa, 0);
				2134	}
				2135
				2136	static int __set_pages_np(struct page *page, int numpages)
				2137	{
				2138	unsigned long tempaddr = (unsigned long) page_address(page);
				2139	struct cpa_data cpa = { .vaddr = &tempaddr,
				2140	.pgd = NULL,
				2141	.numpages = numpages,
				2142	.mask_set = __pgprot(0),
				2143	.mask_clr = __pgprot(_PAGE_PRESENT \| _PAGE_RW),
				2144	.flags = 0};
				2145
				2146	/*
				2147	* No alias checking needed for setting not present flag. otherwise,
				2148	* we may need to break large pages for 64-bit kernel text
				2149	* mappings (this adds to complexity if we want to do this from
				2150	* atomic context especially). Let's keep it simple!
				2151	*/
				2152	return __change_page_attr_set_clr(&cpa, 0);
				2153	}
				2154
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2155	int set_direct_map_invalid_noflush(struct page *page)
				2156	{
				2157	return __set_pages_np(page, 1);
				2158	}
				2159
				2160	int set_direct_map_default_noflush(struct page *page)
				2161	{
				2162	return __set_pages_p(page, 1);
				2163	}
				2164
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2165	void __kernel_map_pages(struct page *page, int numpages, int enable)
				2166	{
				2167	if (PageHighMem(page))
				2168	return;
				2169	if (!enable) {
				2170	debug_check_no_locks_freed(page_address(page),
				2171	numpages * PAGE_SIZE);
				2172	}
				2173
				2174	/*
				2175	* The return value is ignored as the calls cannot fail.
				2176	* Large pages for identity mappings are not used at boot time
				2177	* and hence no memory allocations during large page split.
				2178	*/
				2179	if (enable)
				2180	__set_pages_p(page, numpages);
				2181	else
				2182	__set_pages_np(page, numpages);
				2183
				2184	/*
				2185	* We should perform an IPI and flush all tlbs,
				2186	* but that can deadlock->flush only current cpu.
				2187	* Preemption needs to be disabled around __flush_tlb_all() due to
				2188	* CR3 reload in __native_flush_tlb().
				2189	*/
				2190	preempt_disable();
				2191	__flush_tlb_all();
				2192	preempt_enable();
				2193
				2194	arch_flush_lazy_mmu_mode();
				2195	}
				2196
				2197	#ifdef CONFIG_HIBERNATION
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2198	bool kernel_page_present(struct page *page)
				2199	{
				2200	unsigned int level;
				2201	pte_t *pte;
				2202
				2203	if (PageHighMem(page))
				2204	return false;
				2205
				2206	pte = lookup_address((unsigned long)page_address(page), &level);
				2207	return (pte_val(*pte) & _PAGE_PRESENT);
				2208	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2209	#endif /* CONFIG_HIBERNATION */
				2210
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2211	int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
				2212	unsigned numpages, unsigned long page_flags)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2213	{
				2214	int retval = -EINVAL;
				2215
				2216	struct cpa_data cpa = {
				2217	.vaddr = &address,
				2218	.pfn = pfn,
				2219	.pgd = pgd,
				2220	.numpages = numpages,
				2221	.mask_set = __pgprot(0),
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2222	.mask_clr = __pgprot(~page_flags & (_PAGE_NX\|_PAGE_RW)),
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2223	.flags = 0,
				2224	};
				2225
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2226	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
				2227
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2228	if (!(__supported_pte_mask & _PAGE_NX))
				2229	goto out;
				2230
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2231	if (!(page_flags & _PAGE_ENC))
				2232	cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
				2233
				2234	cpa.mask_set = __pgprot(_PAGE_PRESENT \| page_flags);
				2235
				2236	retval = __change_page_attr_set_clr(&cpa, 0);
				2237	__flush_tlb_all();
				2238
				2239	out:
				2240	return retval;
				2241	}
				2242
				2243	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2244	* __flush_tlb_all() flushes mappings only on current CPU and hence this
				2245	* function shouldn't be used in an SMP environment. Presently, it's used only
				2246	* during boot (way before smp_init()) by EFI subsystem and hence is ok.
				2247	*/
				2248	int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
				2249	unsigned long numpages)
				2250	{
				2251	int retval;
				2252
				2253	/*
				2254	* The typical sequence for unmapping is to find a pte through
				2255	* lookup_address_in_pgd() (ideally, it should never return NULL because
				2256	* the address is already mapped) and change it's protections. As pfn is
				2257	* the target of a mapping, it's not useful while unmapping.
				2258	*/
				2259	struct cpa_data cpa = {
				2260	.vaddr = &address,
				2261	.pfn = 0,
				2262	.pgd = pgd,
				2263	.numpages = numpages,
				2264	.mask_set = __pgprot(0),
				2265	.mask_clr = __pgprot(_PAGE_PRESENT \| _PAGE_RW),
				2266	.flags = 0,
				2267	};
				2268
				2269	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
				2270
				2271	retval = __change_page_attr_set_clr(&cpa, 0);
				2272	__flush_tlb_all();
				2273
				2274	return retval;
				2275	}
				2276
				2277	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2278	* The testcases use internal knowledge of the implementation that shouldn't
				2279	* be exposed to the rest of the kernel. Include these directly here.
				2280	*/
				2281	#ifdef CONFIG_CPA_DEBUG
				2282	#include "pageattr-test.c"
				2283	#endif