Blame - arch/x86/kernel/kvm.c - hafnium/third_party/linux.git

blob: d9b71924c23c9b939986b2d2ab153ba15b7077c3 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/*
				2	* KVM paravirt_ops implementation
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write to the Free Software
				16	* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
				17	*
				18	* Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
				19	* Copyright IBM Corporation, 2007
				20	* Authors: Anthony Liguori <aliguori@us.ibm.com>
				21	*/
				22
				23	#include <linux/context_tracking.h>
				24	#include <linux/init.h>
				25	#include <linux/kernel.h>
				26	#include <linux/kvm_para.h>
				27	#include <linux/cpu.h>
				28	#include <linux/mm.h>
				29	#include <linux/highmem.h>
				30	#include <linux/hardirq.h>
				31	#include <linux/notifier.h>
				32	#include <linux/reboot.h>
				33	#include <linux/hash.h>
				34	#include <linux/sched.h>
				35	#include <linux/slab.h>
				36	#include <linux/kprobes.h>
				37	#include <linux/debugfs.h>
				38	#include <linux/nmi.h>
				39	#include <linux/swait.h>
				40	#include <asm/timer.h>
				41	#include <asm/cpu.h>
				42	#include <asm/traps.h>
				43	#include <asm/desc.h>
				44	#include <asm/tlbflush.h>
				45	#include <asm/apic.h>
				46	#include <asm/apicdef.h>
				47	#include <asm/hypervisor.h>
				48	#include <asm/tlb.h>
				49
				50	static int kvmapf = 1;
				51
				52	static int __init parse_no_kvmapf(char *arg)
				53	{
				54	kvmapf = 0;
				55	return 0;
				56	}
				57
				58	early_param("no-kvmapf", parse_no_kvmapf);
				59
				60	static int steal_acc = 1;
				61	static int __init parse_no_stealacc(char *arg)
				62	{
				63	steal_acc = 0;
				64	return 0;
				65	}
				66
				67	early_param("no-steal-acc", parse_no_stealacc);
				68
				69	static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
				70	static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
				71	static int has_steal_clock = 0;
				72
				73	/*
				74	* No need for any "IO delay" on KVM
				75	*/
				76	static void kvm_io_delay(void)
				77	{
				78	}
				79
				80	#define KVM_TASK_SLEEP_HASHBITS 8
				81	#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
				82
				83	struct kvm_task_sleep_node {
				84	struct hlist_node link;
				85	struct swait_queue_head wq;
				86	u32 token;
				87	int cpu;
				88	bool halted;
				89	};
				90
				91	static struct kvm_task_sleep_head {
				92	raw_spinlock_t lock;
				93	struct hlist_head list;
				94	} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
				95
				96	static struct kvm_task_sleep_node _find_apf_task(struct kvm_task_sleep_head b,
				97	u32 token)
				98	{
				99	struct hlist_node *p;
				100
				101	hlist_for_each(p, &b->list) {
				102	struct kvm_task_sleep_node *n =
				103	hlist_entry(p, typeof(*n), link);
				104	if (n->token == token)
				105	return n;
				106	}
				107
				108	return NULL;
				109	}
				110
				111	/*
				112	* @interrupt_kernel: Is this called from a routine which interrupts the kernel
				113	* (other than user space)?
				114	*/
				115	void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
				116	{
				117	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
				118	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
				119	struct kvm_task_sleep_node n, *e;
				120	DECLARE_SWAITQUEUE(wait);
				121
				122	rcu_irq_enter();
				123
				124	raw_spin_lock(&b->lock);
				125	e = _find_apf_task(b, token);
				126	if (e) {
				127	/* dummy entry exist -> wake up was delivered ahead of PF */
				128	hlist_del(&e->link);
				129	kfree(e);
				130	raw_spin_unlock(&b->lock);
				131
				132	rcu_irq_exit();
				133	return;
				134	}
				135
				136	n.token = token;
				137	n.cpu = smp_processor_id();
				138	n.halted = is_idle_task(current) \|\|
				139	(IS_ENABLED(CONFIG_PREEMPT_COUNT)
				140	? preempt_count() > 1 \|\| rcu_preempt_depth()
				141	: interrupt_kernel);
				142	init_swait_queue_head(&n.wq);
				143	hlist_add_head(&n.link, &b->list);
				144	raw_spin_unlock(&b->lock);
				145
				146	for (;;) {
				147	if (!n.halted)
				148	prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
				149	if (hlist_unhashed(&n.link))
				150	break;
				151
				152	rcu_irq_exit();
				153
				154	if (!n.halted) {
				155	local_irq_enable();
				156	schedule();
				157	local_irq_disable();
				158	} else {
				159	/*
				160	* We cannot reschedule. So halt.
				161	*/
				162	native_safe_halt();
				163	local_irq_disable();
				164	}
				165
				166	rcu_irq_enter();
				167	}
				168	if (!n.halted)
				169	finish_swait(&n.wq, &wait);
				170
				171	rcu_irq_exit();
				172	return;
				173	}
				174	EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
				175
				176	static void apf_task_wake_one(struct kvm_task_sleep_node *n)
				177	{
				178	hlist_del_init(&n->link);
				179	if (n->halted)
				180	smp_send_reschedule(n->cpu);
				181	else if (swq_has_sleeper(&n->wq))
				182	swake_up_one(&n->wq);
				183	}
				184
				185	static void apf_task_wake_all(void)
				186	{
				187	int i;
				188
				189	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
				190	struct hlist_node p, next;
				191	struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
				192	raw_spin_lock(&b->lock);
				193	hlist_for_each_safe(p, next, &b->list) {
				194	struct kvm_task_sleep_node *n =
				195	hlist_entry(p, typeof(*n), link);
				196	if (n->cpu == smp_processor_id())
				197	apf_task_wake_one(n);
				198	}
				199	raw_spin_unlock(&b->lock);
				200	}
				201	}
				202
				203	void kvm_async_pf_task_wake(u32 token)
				204	{
				205	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
				206	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
				207	struct kvm_task_sleep_node *n;
				208
				209	if (token == ~0) {
				210	apf_task_wake_all();
				211	return;
				212	}
				213
				214	again:
				215	raw_spin_lock(&b->lock);
				216	n = _find_apf_task(b, token);
				217	if (!n) {
				218	/*
				219	* async PF was not yet handled.
				220	* Add dummy entry for the token.
				221	*/
				222	n = kzalloc(sizeof(*n), GFP_ATOMIC);
				223	if (!n) {
				224	/*
				225	* Allocation failed! Busy wait while other cpu
				226	* handles async PF.
				227	*/
				228	raw_spin_unlock(&b->lock);
				229	cpu_relax();
				230	goto again;
				231	}
				232	n->token = token;
				233	n->cpu = smp_processor_id();
				234	init_swait_queue_head(&n->wq);
				235	hlist_add_head(&n->link, &b->list);
				236	} else
				237	apf_task_wake_one(n);
				238	raw_spin_unlock(&b->lock);
				239	return;
				240	}
				241	EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
				242
				243	u32 kvm_read_and_reset_pf_reason(void)
				244	{
				245	u32 reason = 0;
				246
				247	if (__this_cpu_read(apf_reason.enabled)) {
				248	reason = __this_cpu_read(apf_reason.reason);
				249	__this_cpu_write(apf_reason.reason, 0);
				250	}
				251
				252	return reason;
				253	}
				254	EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
				255	NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
				256
				257	dotraplinkage void
				258	do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
				259	{
				260	enum ctx_state prev_state;
				261
				262	switch (kvm_read_and_reset_pf_reason()) {
				263	default:
				264	do_page_fault(regs, error_code);
				265	break;
				266	case KVM_PV_REASON_PAGE_NOT_PRESENT:
				267	/* page is swapped out by the host. */
				268	prev_state = exception_enter();
				269	kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
				270	exception_exit(prev_state);
				271	break;
				272	case KVM_PV_REASON_PAGE_READY:
				273	rcu_irq_enter();
				274	kvm_async_pf_task_wake((u32)read_cr2());
				275	rcu_irq_exit();
				276	break;
				277	}
				278	}
				279	NOKPROBE_SYMBOL(do_async_page_fault);
				280
				281	static void __init paravirt_ops_setup(void)
				282	{
				283	pv_info.name = "KVM";
				284
				285	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
				286	pv_cpu_ops.io_delay = kvm_io_delay;
				287
				288	#ifdef CONFIG_X86_IO_APIC
				289	no_timer_check = 1;
				290	#endif
				291	}
				292
				293	static void kvm_register_steal_time(void)
				294	{
				295	int cpu = smp_processor_id();
				296	struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
				297
				298	if (!has_steal_clock)
				299	return;
				300
				301	wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) \| KVM_MSR_ENABLED));
				302	pr_info("kvm-stealtime: cpu %d, msr %llx\n",
				303	cpu, (unsigned long long) slow_virt_to_phys(st));
				304	}
				305
				306	static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
				307
				308	static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
				309	{
				310	/**
				311	* This relies on __test_and_clear_bit to modify the memory
				312	* in a way that is atomic with respect to the local CPU.
				313	* The hypervisor only accesses this memory from the local CPU so
				314	* there's no need for lock or memory barriers.
				315	* An optimization barrier is implied in apic write.
				316	*/
				317	if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
				318	return;
				319	apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
				320	}
				321
				322	static void kvm_guest_cpu_init(void)
				323	{
				324	if (!kvm_para_available())
				325	return;
				326
				327	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
				328	u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
				329
				330	#ifdef CONFIG_PREEMPT
				331	pa \|= KVM_ASYNC_PF_SEND_ALWAYS;
				332	#endif
				333	pa \|= KVM_ASYNC_PF_ENABLED;
				334
				335	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
				336	pa \|= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
				337
				338	wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
				339	__this_cpu_write(apf_reason.enabled, 1);
				340	printk(KERN_INFO"KVM setup async PF for cpu %d\n",
				341	smp_processor_id());
				342	}
				343
				344	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
				345	unsigned long pa;
				346	/* Size alignment is implied but just to make it explicit. */
				347	BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
				348	__this_cpu_write(kvm_apic_eoi, 0);
				349	pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
				350	\| KVM_MSR_ENABLED;
				351	wrmsrl(MSR_KVM_PV_EOI_EN, pa);
				352	}
				353
				354	if (has_steal_clock)
				355	kvm_register_steal_time();
				356	}
				357
				358	static void kvm_pv_disable_apf(void)
				359	{
				360	if (!__this_cpu_read(apf_reason.enabled))
				361	return;
				362
				363	wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
				364	__this_cpu_write(apf_reason.enabled, 0);
				365
				366	printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
				367	smp_processor_id());
				368	}
				369
				370	static void kvm_pv_guest_cpu_reboot(void *unused)
				371	{
				372	/*
				373	* We disable PV EOI before we load a new kernel by kexec,
				374	* since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
				375	* New kernel can re-enable when it boots.
				376	*/
				377	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
				378	wrmsrl(MSR_KVM_PV_EOI_EN, 0);
				379	kvm_pv_disable_apf();
				380	kvm_disable_steal_time();
				381	}
				382
				383	static int kvm_pv_reboot_notify(struct notifier_block *nb,
				384	unsigned long code, void *unused)
				385	{
				386	if (code == SYS_RESTART)
				387	on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
				388	return NOTIFY_DONE;
				389	}
				390
				391	static struct notifier_block kvm_pv_reboot_nb = {
				392	.notifier_call = kvm_pv_reboot_notify,
				393	};
				394
				395	static u64 kvm_steal_clock(int cpu)
				396	{
				397	u64 steal;
				398	struct kvm_steal_time *src;
				399	int version;
				400
				401	src = &per_cpu(steal_time, cpu);
				402	do {
				403	version = src->version;
				404	virt_rmb();
				405	steal = src->steal;
				406	virt_rmb();
				407	} while ((version & 1) \|\| (version != src->version));
				408
				409	return steal;
				410	}
				411
				412	void kvm_disable_steal_time(void)
				413	{
				414	if (!has_steal_clock)
				415	return;
				416
				417	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
				418	}
				419
				420	static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
				421	{
				422	early_set_memory_decrypted((unsigned long) ptr, size);
				423	}
				424
				425	/*
				426	* Iterate through all possible CPUs and map the memory region pointed
				427	* by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
				428	*
				429	* Note: we iterate through all possible CPUs to ensure that CPUs
				430	* hotplugged will have their per-cpu variable already mapped as
				431	* decrypted.
				432	*/
				433	static void __init sev_map_percpu_data(void)
				434	{
				435	int cpu;
				436
				437	if (!sev_active())
				438	return;
				439
				440	for_each_possible_cpu(cpu) {
				441	__set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
				442	__set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
				443	__set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
				444	}
				445	}
				446
				447	#ifdef CONFIG_SMP
				448	#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
				449
				450	static void __send_ipi_mask(const struct cpumask *mask, int vector)
				451	{
				452	unsigned long flags;
				453	int cpu, apic_id, icr;
				454	int min = 0, max = 0;
				455	#ifdef CONFIG_X86_64
				456	__uint128_t ipi_bitmap = 0;
				457	#else
				458	u64 ipi_bitmap = 0;
				459	#endif
				460
				461	if (cpumask_empty(mask))
				462	return;
				463
				464	local_irq_save(flags);
				465
				466	switch (vector) {
				467	default:
				468	icr = APIC_DM_FIXED \| vector;
				469	break;
				470	case NMI_VECTOR:
				471	icr = APIC_DM_NMI;
				472	break;
				473	}
				474
				475	for_each_cpu(cpu, mask) {
				476	apic_id = per_cpu(x86_cpu_to_apicid, cpu);
				477	if (!ipi_bitmap) {
				478	min = max = apic_id;
				479	} else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
				480	ipi_bitmap <<= min - apic_id;
				481	min = apic_id;
				482	} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
				483	max = apic_id < max ? max : apic_id;
				484	} else {
				485	kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
				486	(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
				487	min = max = apic_id;
				488	ipi_bitmap = 0;
				489	}
				490	__set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
				491	}
				492
				493	if (ipi_bitmap) {
				494	kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
				495	(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
				496	}
				497
				498	local_irq_restore(flags);
				499	}
				500
				501	static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
				502	{
				503	__send_ipi_mask(mask, vector);
				504	}
				505
				506	static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
				507	{
				508	unsigned int this_cpu = smp_processor_id();
				509	struct cpumask new_mask;
				510	const struct cpumask *local_mask;
				511
				512	cpumask_copy(&new_mask, mask);
				513	cpumask_clear_cpu(this_cpu, &new_mask);
				514	local_mask = &new_mask;
				515	__send_ipi_mask(local_mask, vector);
				516	}
				517
				518	static void kvm_send_ipi_allbutself(int vector)
				519	{
				520	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
				521	}
				522
				523	static void kvm_send_ipi_all(int vector)
				524	{
				525	__send_ipi_mask(cpu_online_mask, vector);
				526	}
				527
				528	/*
				529	* Set the IPI entry points
				530	*/
				531	static void kvm_setup_pv_ipi(void)
				532	{
				533	apic->send_IPI_mask = kvm_send_ipi_mask;
				534	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
				535	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
				536	apic->send_IPI_all = kvm_send_ipi_all;
				537	pr_info("KVM setup pv IPIs\n");
				538	}
				539
				540	static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
				541	{
				542	native_smp_prepare_cpus(max_cpus);
				543	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
				544	static_branch_disable(&virt_spin_lock_key);
				545	}
				546
				547	static void __init kvm_smp_prepare_boot_cpu(void)
				548	{
				549	/*
				550	* Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
				551	* shares the guest physical address with the hypervisor.
				552	*/
				553	sev_map_percpu_data();
				554
				555	kvm_guest_cpu_init();
				556	native_smp_prepare_boot_cpu();
				557	kvm_spinlock_init();
				558	}
				559
				560	static void kvm_guest_cpu_offline(void)
				561	{
				562	kvm_disable_steal_time();
				563	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
				564	wrmsrl(MSR_KVM_PV_EOI_EN, 0);
				565	kvm_pv_disable_apf();
				566	apf_task_wake_all();
				567	}
				568
				569	static int kvm_cpu_online(unsigned int cpu)
				570	{
				571	local_irq_disable();
				572	kvm_guest_cpu_init();
				573	local_irq_enable();
				574	return 0;
				575	}
				576
				577	static int kvm_cpu_down_prepare(unsigned int cpu)
				578	{
				579	local_irq_disable();
				580	kvm_guest_cpu_offline();
				581	local_irq_enable();
				582	return 0;
				583	}
				584	#endif
				585
				586	static void __init kvm_apf_trap_init(void)
				587	{
				588	update_intr_gate(X86_TRAP_PF, async_page_fault);
				589	}
				590
				591	static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
				592
				593	static void kvm_flush_tlb_others(const struct cpumask *cpumask,
				594	const struct flush_tlb_info *info)
				595	{
				596	u8 state;
				597	int cpu;
				598	struct kvm_steal_time *src;
				599	struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
				600
				601	cpumask_copy(flushmask, cpumask);
				602	/*
				603	* We have to call flush only on online vCPUs. And
				604	* queue flush_on_enter for pre-empted vCPUs
				605	*/
				606	for_each_cpu(cpu, flushmask) {
				607	src = &per_cpu(steal_time, cpu);
				608	state = READ_ONCE(src->preempted);
				609	if ((state & KVM_VCPU_PREEMPTED)) {
				610	if (try_cmpxchg(&src->preempted, &state,
				611	state \| KVM_VCPU_FLUSH_TLB))
				612	__cpumask_clear_cpu(cpu, flushmask);
				613	}
				614	}
				615
				616	native_flush_tlb_others(flushmask, info);
				617	}
				618
				619	static void __init kvm_guest_init(void)
				620	{
				621	int i;
				622
				623	if (!kvm_para_available())
				624	return;
				625
				626	paravirt_ops_setup();
				627	register_reboot_notifier(&kvm_pv_reboot_nb);
				628	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
				629	raw_spin_lock_init(&async_pf_sleepers[i].lock);
				630	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
				631	x86_init.irqs.trap_init = kvm_apf_trap_init;
				632
				633	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
				634	has_steal_clock = 1;
				635	pv_time_ops.steal_clock = kvm_steal_clock;
				636	}
				637
				638	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
				639	!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
				640	kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
				641	pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
				642	pv_mmu_ops.tlb_remove_table = tlb_remove_table;
				643	}
				644
				645	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
				646	apic_set_eoi_write(kvm_guest_apic_eoi_write);
				647
				648	#ifdef CONFIG_SMP
				649	smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
				650	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
				651	if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
				652	kvm_cpu_online, kvm_cpu_down_prepare) < 0)
				653	pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
				654	#else
				655	sev_map_percpu_data();
				656	kvm_guest_cpu_init();
				657	#endif
				658
				659	/*
				660	* Hard lockup detection is enabled by default. Disable it, as guests
				661	* can get false positives too easily, for example if the host is
				662	* overcommitted.
				663	*/
				664	hardlockup_detector_disable();
				665	}
				666
				667	static noinline uint32_t __kvm_cpuid_base(void)
				668	{
				669	if (boot_cpu_data.cpuid_level < 0)
				670	return 0; /* So we don't blow up on old processors */
				671
				672	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
				673	return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
				674
				675	return 0;
				676	}
				677
				678	static inline uint32_t kvm_cpuid_base(void)
				679	{
				680	static int kvm_cpuid_base = -1;
				681
				682	if (kvm_cpuid_base == -1)
				683	kvm_cpuid_base = __kvm_cpuid_base();
				684
				685	return kvm_cpuid_base;
				686	}
				687
				688	bool kvm_para_available(void)
				689	{
				690	return kvm_cpuid_base() != 0;
				691	}
				692	EXPORT_SYMBOL_GPL(kvm_para_available);
				693
				694	unsigned int kvm_arch_para_features(void)
				695	{
				696	return cpuid_eax(kvm_cpuid_base() \| KVM_CPUID_FEATURES);
				697	}
				698
				699	unsigned int kvm_arch_para_hints(void)
				700	{
				701	return cpuid_edx(kvm_cpuid_base() \| KVM_CPUID_FEATURES);
				702	}
				703
				704	static uint32_t __init kvm_detect(void)
				705	{
				706	return kvm_cpuid_base();
				707	}
				708
				709	static void __init kvm_apic_init(void)
				710	{
				711	#if defined(CONFIG_SMP)
				712	if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
				713	kvm_setup_pv_ipi();
				714	#endif
				715	}
				716
				717	static void __init kvm_init_platform(void)
				718	{
				719	kvmclock_init();
				720	x86_platform.apic_post_init = kvm_apic_init;
				721	}
				722
				723	const __initconst struct hypervisor_x86 x86_hyper_kvm = {
				724	.name = "KVM",
				725	.detect = kvm_detect,
				726	.type = X86_HYPER_KVM,
				727	.init.guest_late_init = kvm_guest_init,
				728	.init.x2apic_available = kvm_para_available,
				729	.init.init_platform = kvm_init_platform,
				730	};
				731
				732	static __init int activate_jump_labels(void)
				733	{
				734	if (has_steal_clock) {
				735	static_key_slow_inc(&paravirt_steal_enabled);
				736	if (steal_acc)
				737	static_key_slow_inc(&paravirt_steal_rq_enabled);
				738	}
				739
				740	return 0;
				741	}
				742	arch_initcall(activate_jump_labels);
				743
				744	static __init int kvm_setup_pv_tlb_flush(void)
				745	{
				746	int cpu;
				747
				748	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
				749	!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
				750	kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
				751	for_each_possible_cpu(cpu) {
				752	zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
				753	GFP_KERNEL, cpu_to_node(cpu));
				754	}
				755	pr_info("KVM setup pv remote TLB flush\n");
				756	}
				757
				758	return 0;
				759	}
				760	arch_initcall(kvm_setup_pv_tlb_flush);
				761
				762	#ifdef CONFIG_PARAVIRT_SPINLOCKS
				763
				764	/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
				765	static void kvm_kick_cpu(int cpu)
				766	{
				767	int apicid;
				768	unsigned long flags = 0;
				769
				770	apicid = per_cpu(x86_cpu_to_apicid, cpu);
				771	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
				772	}
				773
				774	#include <asm/qspinlock.h>
				775
				776	static void kvm_wait(u8 *ptr, u8 val)
				777	{
				778	unsigned long flags;
				779
				780	if (in_nmi())
				781	return;
				782
				783	local_irq_save(flags);
				784
				785	if (READ_ONCE(*ptr) != val)
				786	goto out;
				787
				788	/*
				789	* halt until it's our turn and kicked. Note that we do safe halt
				790	* for irq enabled case to avoid hang when lock info is overwritten
				791	* in irq spinlock slowpath and no spurious interrupt occur to save us.
				792	*/
				793	if (arch_irqs_disabled_flags(flags))
				794	halt();
				795	else
				796	safe_halt();
				797
				798	out:
				799	local_irq_restore(flags);
				800	}
				801
				802	#ifdef CONFIG_X86_32
				803	__visible bool __kvm_vcpu_is_preempted(long cpu)
				804	{
				805	struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
				806
				807	return !!(src->preempted & KVM_VCPU_PREEMPTED);
				808	}
				809	PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
				810
				811	#else
				812
				813	#include <asm/asm-offsets.h>
				814
				815	extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
				816
				817	/*
				818	* Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
				819	* restoring to/from the stack.
				820	*/
				821	asm(
				822	".pushsection .text;"
				823	".global __raw_callee_save___kvm_vcpu_is_preempted;"
				824	".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
				825	"__raw_callee_save___kvm_vcpu_is_preempted:"
				826	"movq __per_cpu_offset(,%rdi,8), %rax;"
				827	"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
				828	"setne %al;"
				829	"ret;"
				830	".popsection");
				831
				832	#endif
				833
				834	/*
				835	* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
				836	*/
				837	void __init kvm_spinlock_init(void)
				838	{
				839	if (!kvm_para_available())
				840	return;
				841	/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
				842	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
				843	return;
				844
				845	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
				846	return;
				847
				848	/* Don't use the pvqspinlock code if there is only 1 vCPU. */
				849	if (num_possible_cpus() == 1)
				850	return;
				851
				852	__pv_init_lock_hash();
				853	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
				854	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
				855	pv_lock_ops.wait = kvm_wait;
				856	pv_lock_ops.kick = kvm_kick_cpu;
				857
				858	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
				859	pv_lock_ops.vcpu_is_preempted =
				860	PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
				861	}
				862	}
				863
				864	#endif /* CONFIG_PARAVIRT_SPINLOCKS */