Blame - arch/s390/kvm/kvm-s390.c - hafnium/third_party/linux.git

blob: ac5da6b0b862a3d725297890b7feb6cae6d06788 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* hosting IBM Z kernel virtual machines (s390x)
				4	*
				5	* Copyright IBM Corp. 2008, 2018
				6	*
				7	* Author(s): Carsten Otte <cotte@de.ibm.com>
				8	* Christian Borntraeger <borntraeger@de.ibm.com>
				9	* Heiko Carstens <heiko.carstens@de.ibm.com>
				10	* Christian Ehrhardt <ehrhardt@de.ibm.com>
				11	* Jason J. Herne <jjherne@us.ibm.com>
				12	*/
				13
				14	#include <linux/compiler.h>
				15	#include <linux/err.h>
				16	#include <linux/fs.h>
				17	#include <linux/hrtimer.h>
				18	#include <linux/init.h>
				19	#include <linux/kvm.h>
				20	#include <linux/kvm_host.h>
				21	#include <linux/mman.h>
				22	#include <linux/module.h>
				23	#include <linux/moduleparam.h>
				24	#include <linux/random.h>
				25	#include <linux/slab.h>
				26	#include <linux/timer.h>
				27	#include <linux/vmalloc.h>
				28	#include <linux/bitmap.h>
				29	#include <linux/sched/signal.h>
				30	#include <linux/string.h>
				31
				32	#include <asm/asm-offsets.h>
				33	#include <asm/lowcore.h>
				34	#include <asm/stp.h>
				35	#include <asm/pgtable.h>
				36	#include <asm/gmap.h>
				37	#include <asm/nmi.h>
				38	#include <asm/switch_to.h>
				39	#include <asm/isc.h>
				40	#include <asm/sclp.h>
				41	#include <asm/cpacf.h>
				42	#include <asm/timex.h>
				43	#include "kvm-s390.h"
				44	#include "gaccess.h"
				45
				46	#define KMSG_COMPONENT "kvm-s390"
				47	#undef pr_fmt
				48	#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
				49
				50	#define CREATE_TRACE_POINTS
				51	#include "trace.h"
				52	#include "trace-s390.h"
				53
				54	#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
				55	#define LOCAL_IRQS 32
				56	#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
				57	(KVM_MAX_VCPUS + LOCAL_IRQS))
				58
				59	#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
				60	#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
				61
				62	struct kvm_stats_debugfs_item debugfs_entries[] = {
				63	{ "userspace_handled", VCPU_STAT(exit_userspace) },
				64	{ "exit_null", VCPU_STAT(exit_null) },
				65	{ "exit_validity", VCPU_STAT(exit_validity) },
				66	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
				67	{ "exit_external_request", VCPU_STAT(exit_external_request) },
				68	{ "exit_io_request", VCPU_STAT(exit_io_request) },
				69	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
				70	{ "exit_instruction", VCPU_STAT(exit_instruction) },
				71	{ "exit_pei", VCPU_STAT(exit_pei) },
				72	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
				73	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
				74	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
				75	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
				76	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
				77	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
				78	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
				79	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
				80	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
				81	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
				82	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
				83	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
				84	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
				85	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
				86	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
				87	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
				88	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
				89	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
				90	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
				91	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
				92	{ "deliver_program", VCPU_STAT(deliver_program) },
				93	{ "deliver_io", VCPU_STAT(deliver_io) },
				94	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
				95	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
				96	{ "inject_ckc", VCPU_STAT(inject_ckc) },
				97	{ "inject_cputm", VCPU_STAT(inject_cputm) },
				98	{ "inject_external_call", VCPU_STAT(inject_external_call) },
				99	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
				100	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
				101	{ "inject_io", VM_STAT(inject_io) },
				102	{ "inject_mchk", VCPU_STAT(inject_mchk) },
				103	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
				104	{ "inject_program", VCPU_STAT(inject_program) },
				105	{ "inject_restart", VCPU_STAT(inject_restart) },
				106	{ "inject_service_signal", VM_STAT(inject_service_signal) },
				107	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
				108	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
				109	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
				110	{ "inject_virtio", VM_STAT(inject_virtio) },
				111	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
				112	{ "instruction_gs", VCPU_STAT(instruction_gs) },
				113	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
				114	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
				115	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
				116	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
				117	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
				118	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
				119	{ "instruction_sck", VCPU_STAT(instruction_sck) },
				120	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
				121	{ "instruction_spx", VCPU_STAT(instruction_spx) },
				122	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
				123	{ "instruction_stap", VCPU_STAT(instruction_stap) },
				124	{ "instruction_iske", VCPU_STAT(instruction_iske) },
				125	{ "instruction_ri", VCPU_STAT(instruction_ri) },
				126	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
				127	{ "instruction_sske", VCPU_STAT(instruction_sske) },
				128	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
				129	{ "instruction_essa", VCPU_STAT(instruction_essa) },
				130	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
				131	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
				132	{ "instruction_tb", VCPU_STAT(instruction_tb) },
				133	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
				134	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
				135	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
				136	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
				137	{ "instruction_sie", VCPU_STAT(instruction_sie) },
				138	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
				139	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
				140	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
				141	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
				142	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
				143	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
				144	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
				145	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
				146	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
				147	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
				148	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
				149	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
				150	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
				151	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
				152	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
				153	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
				154	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
				155	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
				156	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
				157	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
				158	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
				159	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
				160	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
				161	{ NULL }
				162	};
				163
				164	struct kvm_s390_tod_clock_ext {
				165	__u8 epoch_idx;
				166	__u64 tod;
				167	__u8 reserved[7];
				168	} __packed;
				169
				170	/* allow nested virtualization in KVM (if enabled by user space) */
				171	static int nested;
				172	module_param(nested, int, S_IRUGO);
				173	MODULE_PARM_DESC(nested, "Nested virtualization support");
				174
				175	/* allow 1m huge page guest backing, if !nested */
				176	static int hpage;
				177	module_param(hpage, int, 0444);
				178	MODULE_PARM_DESC(hpage, "1m huge page backing support");
				179
				180	/*
				181	* For now we handle at most 16 double words as this is what the s390 base
				182	* kernel handles and stores in the prefix page. If we ever need to go beyond
				183	* this, this requires changes to code, but the external uapi can stay.
				184	*/
				185	#define SIZE_INTERNAL 16
				186
				187	/*
				188	* Base feature mask that defines default mask for facilities. Consists of the
				189	* defines in FACILITIES_KVM and the non-hypervisor managed bits.
				190	*/
				191	static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
				192	/*
				193	* Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
				194	* and defines the facilities that can be enabled via a cpu model.
				195	*/
				196	static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
				197
				198	static unsigned long kvm_s390_fac_size(void)
				199	{
				200	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
				201	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
				202	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
				203	sizeof(S390_lowcore.stfle_fac_list));
				204
				205	return SIZE_INTERNAL;
				206	}
				207
				208	/* available cpu features supported by kvm */
				209	static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
				210	/* available subfunctions indicated via query / "test bit" */
				211	static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
				212
				213	static struct gmap_notifier gmap_notifier;
				214	static struct gmap_notifier vsie_gmap_notifier;
				215	debug_info_t *kvm_s390_dbf;
				216
				217	/* Section: not file related */
				218	int kvm_arch_hardware_enable(void)
				219	{
				220	/* every s390 is virtualization enabled ;-) */
				221	return 0;
				222	}
				223
				224	static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
				225	unsigned long end);
				226
				227	static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
				228	{
				229	u8 delta_idx = 0;
				230
				231	/*
				232	* The TOD jumps by delta, we have to compensate this by adding
				233	* -delta to the epoch.
				234	*/
				235	delta = -delta;
				236
				237	/* sign-extension - we're adding to signed values below */
				238	if ((s64)delta < 0)
				239	delta_idx = -1;
				240
				241	scb->epoch += delta;
				242	if (scb->ecd & ECD_MEF) {
				243	scb->epdx += delta_idx;
				244	if (scb->epoch < delta)
				245	scb->epdx += 1;
				246	}
				247	}
				248
				249	/*
				250	* This callback is executed during stop_machine(). All CPUs are therefore
				251	* temporarily stopped. In order not to change guest behavior, we have to
				252	* disable preemption whenever we touch the epoch of kvm and the VCPUs,
				253	* so a CPU won't be stopped while calculating with the epoch.
				254	*/
				255	static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
				256	void *v)
				257	{
				258	struct kvm *kvm;
				259	struct kvm_vcpu *vcpu;
				260	int i;
				261	unsigned long long *delta = v;
				262
				263	list_for_each_entry(kvm, &vm_list, vm_list) {
				264	kvm_for_each_vcpu(i, vcpu, kvm) {
				265	kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
				266	if (i == 0) {
				267	kvm->arch.epoch = vcpu->arch.sie_block->epoch;
				268	kvm->arch.epdx = vcpu->arch.sie_block->epdx;
				269	}
				270	if (vcpu->arch.cputm_enabled)
				271	vcpu->arch.cputm_start += *delta;
				272	if (vcpu->arch.vsie_block)
				273	kvm_clock_sync_scb(vcpu->arch.vsie_block,
				274	*delta);
				275	}
				276	}
				277	return NOTIFY_OK;
				278	}
				279
				280	static struct notifier_block kvm_clock_notifier = {
				281	.notifier_call = kvm_clock_sync,
				282	};
				283
				284	int kvm_arch_hardware_setup(void)
				285	{
				286	gmap_notifier.notifier_call = kvm_gmap_notifier;
				287	gmap_register_pte_notifier(&gmap_notifier);
				288	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
				289	gmap_register_pte_notifier(&vsie_gmap_notifier);
				290	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
				291	&kvm_clock_notifier);
				292	return 0;
				293	}
				294
				295	void kvm_arch_hardware_unsetup(void)
				296	{
				297	gmap_unregister_pte_notifier(&gmap_notifier);
				298	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
				299	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
				300	&kvm_clock_notifier);
				301	}
				302
				303	static void allow_cpu_feat(unsigned long nr)
				304	{
				305	set_bit_inv(nr, kvm_s390_available_cpu_feat);
				306	}
				307
				308	static inline int plo_test_bit(unsigned char nr)
				309	{
				310	register unsigned long r0 asm("0") = (unsigned long) nr \| 0x100;
				311	int cc;
				312
				313	asm volatile(
				314	/* Parameter registers are ignored for "test bit" */
				315	" plo 0,0,0,0(0)\n"
				316	" ipm %0\n"
				317	" srl %0,28\n"
				318	: "=d" (cc)
				319	: "d" (r0)
				320	: "cc");
				321	return cc == 0;
				322	}
				323
				324	static void kvm_s390_cpu_feat_init(void)
				325	{
				326	int i;
				327
				328	for (i = 0; i < 256; ++i) {
				329	if (plo_test_bit(i))
				330	kvm_s390_available_subfunc.plo[i >> 3] \|= 0x80 >> (i & 7);
				331	}
				332
				333	if (test_facility(28)) /* TOD-clock steering */
				334	ptff(kvm_s390_available_subfunc.ptff,
				335	sizeof(kvm_s390_available_subfunc.ptff),
				336	PTFF_QAF);
				337
				338	if (test_facility(17)) { /* MSA */
				339	__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
				340	kvm_s390_available_subfunc.kmac);
				341	__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
				342	kvm_s390_available_subfunc.kmc);
				343	__cpacf_query(CPACF_KM, (cpacf_mask_t *)
				344	kvm_s390_available_subfunc.km);
				345	__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
				346	kvm_s390_available_subfunc.kimd);
				347	__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
				348	kvm_s390_available_subfunc.klmd);
				349	}
				350	if (test_facility(76)) /* MSA3 */
				351	__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
				352	kvm_s390_available_subfunc.pckmo);
				353	if (test_facility(77)) { /* MSA4 */
				354	__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
				355	kvm_s390_available_subfunc.kmctr);
				356	__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
				357	kvm_s390_available_subfunc.kmf);
				358	__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
				359	kvm_s390_available_subfunc.kmo);
				360	__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
				361	kvm_s390_available_subfunc.pcc);
				362	}
				363	if (test_facility(57)) /* MSA5 */
				364	__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
				365	kvm_s390_available_subfunc.ppno);
				366
				367	if (test_facility(146)) /* MSA8 */
				368	__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
				369	kvm_s390_available_subfunc.kma);
				370
				371	if (MACHINE_HAS_ESOP)
				372	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
				373	/*
				374	* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
				375	* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
				376	*/
				377	if (!sclp.has_sief2 \|\| !MACHINE_HAS_ESOP \|\| !sclp.has_64bscao \|\|
				378	!test_facility(3) \|\| !nested)
				379	return;
				380	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
				381	if (sclp.has_64bscao)
				382	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
				383	if (sclp.has_siif)
				384	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
				385	if (sclp.has_gpere)
				386	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
				387	if (sclp.has_gsls)
				388	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
				389	if (sclp.has_ib)
				390	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
				391	if (sclp.has_cei)
				392	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
				393	if (sclp.has_ibs)
				394	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
				395	if (sclp.has_kss)
				396	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
				397	/*
				398	* KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
				399	* all skey handling functions read/set the skey from the PGSTE
				400	* instead of the real storage key.
				401	*
				402	* KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
				403	* pages being detected as preserved although they are resident.
				404	*
				405	* KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
				406	* have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
				407	*
				408	* For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
				409	* KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
				410	* correctly shadowed. We can do that for the PGSTE but not for PTE.I.
				411	*
				412	* KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
				413	* cannot easily shadow the SCA because of the ipte lock.
				414	*/
				415	}
				416
				417	int kvm_arch_init(void *opaque)
				418	{
				419	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
				420	if (!kvm_s390_dbf)
				421	return -ENOMEM;
				422
				423	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
				424	debug_unregister(kvm_s390_dbf);
				425	return -ENOMEM;
				426	}
				427
				428	kvm_s390_cpu_feat_init();
				429
				430	/* Register floating interrupt controller interface. */
				431	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
				432	}
				433
				434	void kvm_arch_exit(void)
				435	{
				436	debug_unregister(kvm_s390_dbf);
				437	}
				438
				439	/* Section: device related */
				440	long kvm_arch_dev_ioctl(struct file *filp,
				441	unsigned int ioctl, unsigned long arg)
				442	{
				443	if (ioctl == KVM_S390_ENABLE_SIE)
				444	return s390_enable_sie();
				445	return -EINVAL;
				446	}
				447
				448	int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
				449	{
				450	int r;
				451
				452	switch (ext) {
				453	case KVM_CAP_S390_PSW:
				454	case KVM_CAP_S390_GMAP:
				455	case KVM_CAP_SYNC_MMU:
				456	#ifdef CONFIG_KVM_S390_UCONTROL
				457	case KVM_CAP_S390_UCONTROL:
				458	#endif
				459	case KVM_CAP_ASYNC_PF:
				460	case KVM_CAP_SYNC_REGS:
				461	case KVM_CAP_ONE_REG:
				462	case KVM_CAP_ENABLE_CAP:
				463	case KVM_CAP_S390_CSS_SUPPORT:
				464	case KVM_CAP_IOEVENTFD:
				465	case KVM_CAP_DEVICE_CTRL:
				466	case KVM_CAP_ENABLE_CAP_VM:
				467	case KVM_CAP_S390_IRQCHIP:
				468	case KVM_CAP_VM_ATTRIBUTES:
				469	case KVM_CAP_MP_STATE:
				470	case KVM_CAP_IMMEDIATE_EXIT:
				471	case KVM_CAP_S390_INJECT_IRQ:
				472	case KVM_CAP_S390_USER_SIGP:
				473	case KVM_CAP_S390_USER_STSI:
				474	case KVM_CAP_S390_SKEYS:
				475	case KVM_CAP_S390_IRQ_STATE:
				476	case KVM_CAP_S390_USER_INSTR0:
				477	case KVM_CAP_S390_CMMA_MIGRATION:
				478	case KVM_CAP_S390_AIS:
				479	case KVM_CAP_S390_AIS_MIGRATION:
				480	r = 1;
				481	break;
				482	case KVM_CAP_S390_HPAGE_1M:
				483	r = 0;
				484	if (hpage && !kvm_is_ucontrol(kvm))
				485	r = 1;
				486	break;
				487	case KVM_CAP_S390_MEM_OP:
				488	r = MEM_OP_MAX_SIZE;
				489	break;
				490	case KVM_CAP_NR_VCPUS:
				491	case KVM_CAP_MAX_VCPUS:
				492	r = KVM_S390_BSCA_CPU_SLOTS;
				493	if (!kvm_s390_use_sca_entries())
				494	r = KVM_MAX_VCPUS;
				495	else if (sclp.has_esca && sclp.has_64bscao)
				496	r = KVM_S390_ESCA_CPU_SLOTS;
				497	break;
				498	case KVM_CAP_NR_MEMSLOTS:
				499	r = KVM_USER_MEM_SLOTS;
				500	break;
				501	case KVM_CAP_S390_COW:
				502	r = MACHINE_HAS_ESOP;
				503	break;
				504	case KVM_CAP_S390_VECTOR_REGISTERS:
				505	r = MACHINE_HAS_VX;
				506	break;
				507	case KVM_CAP_S390_RI:
				508	r = test_facility(64);
				509	break;
				510	case KVM_CAP_S390_GS:
				511	r = test_facility(133);
				512	break;
				513	case KVM_CAP_S390_BPB:
				514	r = test_facility(82);
				515	break;
				516	default:
				517	r = 0;
				518	}
				519	return r;
				520	}
				521
				522	static void kvm_s390_sync_dirty_log(struct kvm *kvm,
				523	struct kvm_memory_slot *memslot)
				524	{
				525	int i;
				526	gfn_t cur_gfn, last_gfn;
				527	unsigned long gaddr, vmaddr;
				528	struct gmap *gmap = kvm->arch.gmap;
				529	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
				530
				531	/* Loop over all guest segments */
				532	cur_gfn = memslot->base_gfn;
				533	last_gfn = memslot->base_gfn + memslot->npages;
				534	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
				535	gaddr = gfn_to_gpa(cur_gfn);
				536	vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
				537	if (kvm_is_error_hva(vmaddr))
				538	continue;
				539
				540	bitmap_zero(bitmap, _PAGE_ENTRIES);
				541	gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
				542	for (i = 0; i < _PAGE_ENTRIES; i++) {
				543	if (test_bit(i, bitmap))
				544	mark_page_dirty(kvm, cur_gfn + i);
				545	}
				546
				547	if (fatal_signal_pending(current))
				548	return;
				549	cond_resched();
				550	}
				551	}
				552
				553	/* Section: vm related */
				554	static void sca_del_vcpu(struct kvm_vcpu *vcpu);
				555
				556	/*
				557	* Get (and clear) the dirty memory log for a memory slot.
				558	*/
				559	int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
				560	struct kvm_dirty_log *log)
				561	{
				562	int r;
				563	unsigned long n;
				564	struct kvm_memslots *slots;
				565	struct kvm_memory_slot *memslot;
				566	int is_dirty = 0;
				567
				568	if (kvm_is_ucontrol(kvm))
				569	return -EINVAL;
				570
				571	mutex_lock(&kvm->slots_lock);
				572
				573	r = -EINVAL;
				574	if (log->slot >= KVM_USER_MEM_SLOTS)
				575	goto out;
				576
				577	slots = kvm_memslots(kvm);
				578	memslot = id_to_memslot(slots, log->slot);
				579	r = -ENOENT;
				580	if (!memslot->dirty_bitmap)
				581	goto out;
				582
				583	kvm_s390_sync_dirty_log(kvm, memslot);
				584	r = kvm_get_dirty_log(kvm, log, &is_dirty);
				585	if (r)
				586	goto out;
				587
				588	/* Clear the dirty log */
				589	if (is_dirty) {
				590	n = kvm_dirty_bitmap_bytes(memslot);
				591	memset(memslot->dirty_bitmap, 0, n);
				592	}
				593	r = 0;
				594	out:
				595	mutex_unlock(&kvm->slots_lock);
				596	return r;
				597	}
				598
				599	static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
				600	{
				601	unsigned int i;
				602	struct kvm_vcpu *vcpu;
				603
				604	kvm_for_each_vcpu(i, vcpu, kvm) {
				605	kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
				606	}
				607	}
				608
				609	static int kvm_vm_ioctl_enable_cap(struct kvm kvm, struct kvm_enable_cap cap)
				610	{
				611	int r;
				612
				613	if (cap->flags)
				614	return -EINVAL;
				615
				616	switch (cap->cap) {
				617	case KVM_CAP_S390_IRQCHIP:
				618	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
				619	kvm->arch.use_irqchip = 1;
				620	r = 0;
				621	break;
				622	case KVM_CAP_S390_USER_SIGP:
				623	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
				624	kvm->arch.user_sigp = 1;
				625	r = 0;
				626	break;
				627	case KVM_CAP_S390_VECTOR_REGISTERS:
				628	mutex_lock(&kvm->lock);
				629	if (kvm->created_vcpus) {
				630	r = -EBUSY;
				631	} else if (MACHINE_HAS_VX) {
				632	set_kvm_facility(kvm->arch.model.fac_mask, 129);
				633	set_kvm_facility(kvm->arch.model.fac_list, 129);
				634	if (test_facility(134)) {
				635	set_kvm_facility(kvm->arch.model.fac_mask, 134);
				636	set_kvm_facility(kvm->arch.model.fac_list, 134);
				637	}
				638	if (test_facility(135)) {
				639	set_kvm_facility(kvm->arch.model.fac_mask, 135);
				640	set_kvm_facility(kvm->arch.model.fac_list, 135);
				641	}
				642	r = 0;
				643	} else
				644	r = -EINVAL;
				645	mutex_unlock(&kvm->lock);
				646	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
				647	r ? "(not available)" : "(success)");
				648	break;
				649	case KVM_CAP_S390_RI:
				650	r = -EINVAL;
				651	mutex_lock(&kvm->lock);
				652	if (kvm->created_vcpus) {
				653	r = -EBUSY;
				654	} else if (test_facility(64)) {
				655	set_kvm_facility(kvm->arch.model.fac_mask, 64);
				656	set_kvm_facility(kvm->arch.model.fac_list, 64);
				657	r = 0;
				658	}
				659	mutex_unlock(&kvm->lock);
				660	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
				661	r ? "(not available)" : "(success)");
				662	break;
				663	case KVM_CAP_S390_AIS:
				664	mutex_lock(&kvm->lock);
				665	if (kvm->created_vcpus) {
				666	r = -EBUSY;
				667	} else {
				668	set_kvm_facility(kvm->arch.model.fac_mask, 72);
				669	set_kvm_facility(kvm->arch.model.fac_list, 72);
				670	r = 0;
				671	}
				672	mutex_unlock(&kvm->lock);
				673	VM_EVENT(kvm, 3, "ENABLE: AIS %s",
				674	r ? "(not available)" : "(success)");
				675	break;
				676	case KVM_CAP_S390_GS:
				677	r = -EINVAL;
				678	mutex_lock(&kvm->lock);
				679	if (kvm->created_vcpus) {
				680	r = -EBUSY;
				681	} else if (test_facility(133)) {
				682	set_kvm_facility(kvm->arch.model.fac_mask, 133);
				683	set_kvm_facility(kvm->arch.model.fac_list, 133);
				684	r = 0;
				685	}
				686	mutex_unlock(&kvm->lock);
				687	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
				688	r ? "(not available)" : "(success)");
				689	break;
				690	case KVM_CAP_S390_HPAGE_1M:
				691	mutex_lock(&kvm->lock);
				692	if (kvm->created_vcpus)
				693	r = -EBUSY;
				694	else if (!hpage \|\| kvm->arch.use_cmma \|\| kvm_is_ucontrol(kvm))
				695	r = -EINVAL;
				696	else {
				697	r = 0;
				698	down_write(&kvm->mm->mmap_sem);
				699	kvm->mm->context.allow_gmap_hpage_1m = 1;
				700	up_write(&kvm->mm->mmap_sem);
				701	/*
				702	* We might have to create fake 4k page
				703	* tables. To avoid that the hardware works on
				704	* stale PGSTEs, we emulate these instructions.
				705	*/
				706	kvm->arch.use_skf = 0;
				707	kvm->arch.use_pfmfi = 0;
				708	}
				709	mutex_unlock(&kvm->lock);
				710	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
				711	r ? "(not available)" : "(success)");
				712	break;
				713	case KVM_CAP_S390_USER_STSI:
				714	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
				715	kvm->arch.user_stsi = 1;
				716	r = 0;
				717	break;
				718	case KVM_CAP_S390_USER_INSTR0:
				719	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
				720	kvm->arch.user_instr0 = 1;
				721	icpt_operexc_on_all_vcpus(kvm);
				722	r = 0;
				723	break;
				724	default:
				725	r = -EINVAL;
				726	break;
				727	}
				728	return r;
				729	}
				730
				731	static int kvm_s390_get_mem_control(struct kvm kvm, struct kvm_device_attr attr)
				732	{
				733	int ret;
				734
				735	switch (attr->attr) {
				736	case KVM_S390_VM_MEM_LIMIT_SIZE:
				737	ret = 0;
				738	VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
				739	kvm->arch.mem_limit);
				740	if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
				741	ret = -EFAULT;
				742	break;
				743	default:
				744	ret = -ENXIO;
				745	break;
				746	}
				747	return ret;
				748	}
				749
				750	static int kvm_s390_set_mem_control(struct kvm kvm, struct kvm_device_attr attr)
				751	{
				752	int ret;
				753	unsigned int idx;
				754	switch (attr->attr) {
				755	case KVM_S390_VM_MEM_ENABLE_CMMA:
				756	ret = -ENXIO;
				757	if (!sclp.has_cmma)
				758	break;
				759
				760	VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
				761	mutex_lock(&kvm->lock);
				762	if (kvm->created_vcpus)
				763	ret = -EBUSY;
				764	else if (kvm->mm->context.allow_gmap_hpage_1m)
				765	ret = -EINVAL;
				766	else {
				767	kvm->arch.use_cmma = 1;
				768	/* Not compatible with cmma. */
				769	kvm->arch.use_pfmfi = 0;
				770	ret = 0;
				771	}
				772	mutex_unlock(&kvm->lock);
				773	break;
				774	case KVM_S390_VM_MEM_CLR_CMMA:
				775	ret = -ENXIO;
				776	if (!sclp.has_cmma)
				777	break;
				778	ret = -EINVAL;
				779	if (!kvm->arch.use_cmma)
				780	break;
				781
				782	VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
				783	mutex_lock(&kvm->lock);
				784	idx = srcu_read_lock(&kvm->srcu);
				785	s390_reset_cmma(kvm->arch.gmap->mm);
				786	srcu_read_unlock(&kvm->srcu, idx);
				787	mutex_unlock(&kvm->lock);
				788	ret = 0;
				789	break;
				790	case KVM_S390_VM_MEM_LIMIT_SIZE: {
				791	unsigned long new_limit;
				792
				793	if (kvm_is_ucontrol(kvm))
				794	return -EINVAL;
				795
				796	if (get_user(new_limit, (u64 __user *)attr->addr))
				797	return -EFAULT;
				798
				799	if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
				800	new_limit > kvm->arch.mem_limit)
				801	return -E2BIG;
				802
				803	if (!new_limit)
				804	return -EINVAL;
				805
				806	/* gmap_create takes last usable address */
				807	if (new_limit != KVM_S390_NO_MEM_LIMIT)
				808	new_limit -= 1;
				809
				810	ret = -EBUSY;
				811	mutex_lock(&kvm->lock);
				812	if (!kvm->created_vcpus) {
				813	/* gmap_create will round the limit up */
				814	struct gmap *new = gmap_create(current->mm, new_limit);
				815
				816	if (!new) {
				817	ret = -ENOMEM;
				818	} else {
				819	gmap_remove(kvm->arch.gmap);
				820	new->private = kvm;
				821	kvm->arch.gmap = new;
				822	ret = 0;
				823	}
				824	}
				825	mutex_unlock(&kvm->lock);
				826	VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
				827	VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
				828	(void *) kvm->arch.gmap->asce);
				829	break;
				830	}
				831	default:
				832	ret = -ENXIO;
				833	break;
				834	}
				835	return ret;
				836	}
				837
				838	static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
				839
				840	void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
				841	{
				842	struct kvm_vcpu *vcpu;
				843	int i;
				844
				845	kvm_s390_vcpu_block_all(kvm);
				846
				847	kvm_for_each_vcpu(i, vcpu, kvm)
				848	kvm_s390_vcpu_crypto_setup(vcpu);
				849
				850	kvm_s390_vcpu_unblock_all(kvm);
				851	}
				852
				853	static int kvm_s390_vm_set_crypto(struct kvm kvm, struct kvm_device_attr attr)
				854	{
				855	if (!test_kvm_facility(kvm, 76))
				856	return -EINVAL;
				857
				858	mutex_lock(&kvm->lock);
				859	switch (attr->attr) {
				860	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
				861	get_random_bytes(
				862	kvm->arch.crypto.crycb->aes_wrapping_key_mask,
				863	sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
				864	kvm->arch.crypto.aes_kw = 1;
				865	VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
				866	break;
				867	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
				868	get_random_bytes(
				869	kvm->arch.crypto.crycb->dea_wrapping_key_mask,
				870	sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
				871	kvm->arch.crypto.dea_kw = 1;
				872	VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
				873	break;
				874	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
				875	kvm->arch.crypto.aes_kw = 0;
				876	memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
				877	sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
				878	VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
				879	break;
				880	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
				881	kvm->arch.crypto.dea_kw = 0;
				882	memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
				883	sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
				884	VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
				885	break;
				886	default:
				887	mutex_unlock(&kvm->lock);
				888	return -ENXIO;
				889	}
				890
				891	kvm_s390_vcpu_crypto_reset_all(kvm);
				892	mutex_unlock(&kvm->lock);
				893	return 0;
				894	}
				895
				896	static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
				897	{
				898	int cx;
				899	struct kvm_vcpu *vcpu;
				900
				901	kvm_for_each_vcpu(cx, vcpu, kvm)
				902	kvm_s390_sync_request(req, vcpu);
				903	}
				904
				905	/*
				906	* Must be called with kvm->srcu held to avoid races on memslots, and with
				907	* kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
				908	*/
				909	static int kvm_s390_vm_start_migration(struct kvm *kvm)
				910	{
				911	struct kvm_memory_slot *ms;
				912	struct kvm_memslots *slots;
				913	unsigned long ram_pages = 0;
				914	int slotnr;
				915
				916	/* migration mode already enabled */
				917	if (kvm->arch.migration_mode)
				918	return 0;
				919	slots = kvm_memslots(kvm);
				920	if (!slots \|\| !slots->used_slots)
				921	return -EINVAL;
				922
				923	if (!kvm->arch.use_cmma) {
				924	kvm->arch.migration_mode = 1;
				925	return 0;
				926	}
				927	/* mark all the pages in active slots as dirty */
				928	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
				929	ms = slots->memslots + slotnr;
				930	/*
				931	* The second half of the bitmap is only used on x86,
				932	* and would be wasted otherwise, so we put it to good
				933	* use here to keep track of the state of the storage
				934	* attributes.
				935	*/
				936	memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
				937	ram_pages += ms->npages;
				938	}
				939	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
				940	kvm->arch.migration_mode = 1;
				941	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
				942	return 0;
				943	}
				944
				945	/*
				946	* Must be called with kvm->slots_lock to avoid races with ourselves and
				947	* kvm_s390_vm_start_migration.
				948	*/
				949	static int kvm_s390_vm_stop_migration(struct kvm *kvm)
				950	{
				951	/* migration mode already disabled */
				952	if (!kvm->arch.migration_mode)
				953	return 0;
				954	kvm->arch.migration_mode = 0;
				955	if (kvm->arch.use_cmma)
				956	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
				957	return 0;
				958	}
				959
				960	static int kvm_s390_vm_set_migration(struct kvm *kvm,
				961	struct kvm_device_attr *attr)
				962	{
				963	int res = -ENXIO;
				964
				965	mutex_lock(&kvm->slots_lock);
				966	switch (attr->attr) {
				967	case KVM_S390_VM_MIGRATION_START:
				968	res = kvm_s390_vm_start_migration(kvm);
				969	break;
				970	case KVM_S390_VM_MIGRATION_STOP:
				971	res = kvm_s390_vm_stop_migration(kvm);
				972	break;
				973	default:
				974	break;
				975	}
				976	mutex_unlock(&kvm->slots_lock);
				977
				978	return res;
				979	}
				980
				981	static int kvm_s390_vm_get_migration(struct kvm *kvm,
				982	struct kvm_device_attr *attr)
				983	{
				984	u64 mig = kvm->arch.migration_mode;
				985
				986	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
				987	return -ENXIO;
				988
				989	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
				990	return -EFAULT;
				991	return 0;
				992	}
				993
				994	static int kvm_s390_set_tod_ext(struct kvm kvm, struct kvm_device_attr attr)
				995	{
				996	struct kvm_s390_vm_tod_clock gtod;
				997
				998	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
				999	return -EFAULT;
				1000
				1001	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
				1002	return -EINVAL;
				1003	kvm_s390_set_tod_clock(kvm, &gtod);
				1004
				1005	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
				1006	gtod.epoch_idx, gtod.tod);
				1007
				1008	return 0;
				1009	}
				1010
				1011	static int kvm_s390_set_tod_high(struct kvm kvm, struct kvm_device_attr attr)
				1012	{
				1013	u8 gtod_high;
				1014
				1015	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
				1016	sizeof(gtod_high)))
				1017	return -EFAULT;
				1018
				1019	if (gtod_high != 0)
				1020	return -EINVAL;
				1021	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
				1022
				1023	return 0;
				1024	}
				1025
				1026	static int kvm_s390_set_tod_low(struct kvm kvm, struct kvm_device_attr attr)
				1027	{
				1028	struct kvm_s390_vm_tod_clock gtod = { 0 };
				1029
				1030	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
				1031	sizeof(gtod.tod)))
				1032	return -EFAULT;
				1033
				1034	kvm_s390_set_tod_clock(kvm, &gtod);
				1035	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
				1036	return 0;
				1037	}
				1038
				1039	static int kvm_s390_set_tod(struct kvm kvm, struct kvm_device_attr attr)
				1040	{
				1041	int ret;
				1042
				1043	if (attr->flags)
				1044	return -EINVAL;
				1045
				1046	switch (attr->attr) {
				1047	case KVM_S390_VM_TOD_EXT:
				1048	ret = kvm_s390_set_tod_ext(kvm, attr);
				1049	break;
				1050	case KVM_S390_VM_TOD_HIGH:
				1051	ret = kvm_s390_set_tod_high(kvm, attr);
				1052	break;
				1053	case KVM_S390_VM_TOD_LOW:
				1054	ret = kvm_s390_set_tod_low(kvm, attr);
				1055	break;
				1056	default:
				1057	ret = -ENXIO;
				1058	break;
				1059	}
				1060	return ret;
				1061	}
				1062
				1063	static void kvm_s390_get_tod_clock(struct kvm *kvm,
				1064	struct kvm_s390_vm_tod_clock *gtod)
				1065	{
				1066	struct kvm_s390_tod_clock_ext htod;
				1067
				1068	preempt_disable();
				1069
				1070	get_tod_clock_ext((char *)&htod);
				1071
				1072	gtod->tod = htod.tod + kvm->arch.epoch;
				1073	gtod->epoch_idx = 0;
				1074	if (test_kvm_facility(kvm, 139)) {
				1075	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
				1076	if (gtod->tod < htod.tod)
				1077	gtod->epoch_idx += 1;
				1078	}
				1079
				1080	preempt_enable();
				1081	}
				1082
				1083	static int kvm_s390_get_tod_ext(struct kvm kvm, struct kvm_device_attr attr)
				1084	{
				1085	struct kvm_s390_vm_tod_clock gtod;
				1086
				1087	memset(&gtod, 0, sizeof(gtod));
				1088	kvm_s390_get_tod_clock(kvm, &gtod);
				1089	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
				1090	return -EFAULT;
				1091
				1092	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
				1093	gtod.epoch_idx, gtod.tod);
				1094	return 0;
				1095	}
				1096
				1097	static int kvm_s390_get_tod_high(struct kvm kvm, struct kvm_device_attr attr)
				1098	{
				1099	u8 gtod_high = 0;
				1100
				1101	if (copy_to_user((void __user *)attr->addr, &gtod_high,
				1102	sizeof(gtod_high)))
				1103	return -EFAULT;
				1104	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
				1105
				1106	return 0;
				1107	}
				1108
				1109	static int kvm_s390_get_tod_low(struct kvm kvm, struct kvm_device_attr attr)
				1110	{
				1111	u64 gtod;
				1112
				1113	gtod = kvm_s390_get_tod_clock_fast(kvm);
				1114	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
				1115	return -EFAULT;
				1116	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
				1117
				1118	return 0;
				1119	}
				1120
				1121	static int kvm_s390_get_tod(struct kvm kvm, struct kvm_device_attr attr)
				1122	{
				1123	int ret;
				1124
				1125	if (attr->flags)
				1126	return -EINVAL;
				1127
				1128	switch (attr->attr) {
				1129	case KVM_S390_VM_TOD_EXT:
				1130	ret = kvm_s390_get_tod_ext(kvm, attr);
				1131	break;
				1132	case KVM_S390_VM_TOD_HIGH:
				1133	ret = kvm_s390_get_tod_high(kvm, attr);
				1134	break;
				1135	case KVM_S390_VM_TOD_LOW:
				1136	ret = kvm_s390_get_tod_low(kvm, attr);
				1137	break;
				1138	default:
				1139	ret = -ENXIO;
				1140	break;
				1141	}
				1142	return ret;
				1143	}
				1144
				1145	static int kvm_s390_set_processor(struct kvm kvm, struct kvm_device_attr attr)
				1146	{
				1147	struct kvm_s390_vm_cpu_processor *proc;
				1148	u16 lowest_ibc, unblocked_ibc;
				1149	int ret = 0;
				1150
				1151	mutex_lock(&kvm->lock);
				1152	if (kvm->created_vcpus) {
				1153	ret = -EBUSY;
				1154	goto out;
				1155	}
				1156	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
				1157	if (!proc) {
				1158	ret = -ENOMEM;
				1159	goto out;
				1160	}
				1161	if (!copy_from_user(proc, (void __user *)attr->addr,
				1162	sizeof(*proc))) {
				1163	kvm->arch.model.cpuid = proc->cpuid;
				1164	lowest_ibc = sclp.ibc >> 16 & 0xfff;
				1165	unblocked_ibc = sclp.ibc & 0xfff;
				1166	if (lowest_ibc && proc->ibc) {
				1167	if (proc->ibc > unblocked_ibc)
				1168	kvm->arch.model.ibc = unblocked_ibc;
				1169	else if (proc->ibc < lowest_ibc)
				1170	kvm->arch.model.ibc = lowest_ibc;
				1171	else
				1172	kvm->arch.model.ibc = proc->ibc;
				1173	}
				1174	memcpy(kvm->arch.model.fac_list, proc->fac_list,
				1175	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1176	VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
				1177	kvm->arch.model.ibc,
				1178	kvm->arch.model.cpuid);
				1179	VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
				1180	kvm->arch.model.fac_list[0],
				1181	kvm->arch.model.fac_list[1],
				1182	kvm->arch.model.fac_list[2]);
				1183	} else
				1184	ret = -EFAULT;
				1185	kfree(proc);
				1186	out:
				1187	mutex_unlock(&kvm->lock);
				1188	return ret;
				1189	}
				1190
				1191	static int kvm_s390_set_processor_feat(struct kvm *kvm,
				1192	struct kvm_device_attr *attr)
				1193	{
				1194	struct kvm_s390_vm_cpu_feat data;
				1195
				1196	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
				1197	return -EFAULT;
				1198	if (!bitmap_subset((unsigned long *) data.feat,
				1199	kvm_s390_available_cpu_feat,
				1200	KVM_S390_VM_CPU_FEAT_NR_BITS))
				1201	return -EINVAL;
				1202
				1203	mutex_lock(&kvm->lock);
				1204	if (kvm->created_vcpus) {
				1205	mutex_unlock(&kvm->lock);
				1206	return -EBUSY;
				1207	}
				1208	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
				1209	KVM_S390_VM_CPU_FEAT_NR_BITS);
				1210	mutex_unlock(&kvm->lock);
				1211	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
				1212	data.feat[0],
				1213	data.feat[1],
				1214	data.feat[2]);
				1215	return 0;
				1216	}
				1217
				1218	static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
				1219	struct kvm_device_attr *attr)
				1220	{
				1221	/*
				1222	* Once supported by kernel + hw, we have to store the subfunctions
				1223	* in kvm->arch and remember that user space configured them.
				1224	*/
				1225	return -ENXIO;
				1226	}
				1227
				1228	static int kvm_s390_set_cpu_model(struct kvm kvm, struct kvm_device_attr attr)
				1229	{
				1230	int ret = -ENXIO;
				1231
				1232	switch (attr->attr) {
				1233	case KVM_S390_VM_CPU_PROCESSOR:
				1234	ret = kvm_s390_set_processor(kvm, attr);
				1235	break;
				1236	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
				1237	ret = kvm_s390_set_processor_feat(kvm, attr);
				1238	break;
				1239	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
				1240	ret = kvm_s390_set_processor_subfunc(kvm, attr);
				1241	break;
				1242	}
				1243	return ret;
				1244	}
				1245
				1246	static int kvm_s390_get_processor(struct kvm kvm, struct kvm_device_attr attr)
				1247	{
				1248	struct kvm_s390_vm_cpu_processor *proc;
				1249	int ret = 0;
				1250
				1251	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
				1252	if (!proc) {
				1253	ret = -ENOMEM;
				1254	goto out;
				1255	}
				1256	proc->cpuid = kvm->arch.model.cpuid;
				1257	proc->ibc = kvm->arch.model.ibc;
				1258	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
				1259	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1260	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
				1261	kvm->arch.model.ibc,
				1262	kvm->arch.model.cpuid);
				1263	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
				1264	kvm->arch.model.fac_list[0],
				1265	kvm->arch.model.fac_list[1],
				1266	kvm->arch.model.fac_list[2]);
				1267	if (copy_to_user((void __user )attr->addr, proc, sizeof(proc)))
				1268	ret = -EFAULT;
				1269	kfree(proc);
				1270	out:
				1271	return ret;
				1272	}
				1273
				1274	static int kvm_s390_get_machine(struct kvm kvm, struct kvm_device_attr attr)
				1275	{
				1276	struct kvm_s390_vm_cpu_machine *mach;
				1277	int ret = 0;
				1278
				1279	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
				1280	if (!mach) {
				1281	ret = -ENOMEM;
				1282	goto out;
				1283	}
				1284	get_cpu_id((struct cpuid *) &mach->cpuid);
				1285	mach->ibc = sclp.ibc;
				1286	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
				1287	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1288	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
				1289	sizeof(S390_lowcore.stfle_fac_list));
				1290	VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
				1291	kvm->arch.model.ibc,
				1292	kvm->arch.model.cpuid);
				1293	VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
				1294	mach->fac_mask[0],
				1295	mach->fac_mask[1],
				1296	mach->fac_mask[2]);
				1297	VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
				1298	mach->fac_list[0],
				1299	mach->fac_list[1],
				1300	mach->fac_list[2]);
				1301	if (copy_to_user((void __user )attr->addr, mach, sizeof(mach)))
				1302	ret = -EFAULT;
				1303	kfree(mach);
				1304	out:
				1305	return ret;
				1306	}
				1307
				1308	static int kvm_s390_get_processor_feat(struct kvm *kvm,
				1309	struct kvm_device_attr *attr)
				1310	{
				1311	struct kvm_s390_vm_cpu_feat data;
				1312
				1313	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
				1314	KVM_S390_VM_CPU_FEAT_NR_BITS);
				1315	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
				1316	return -EFAULT;
				1317	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
				1318	data.feat[0],
				1319	data.feat[1],
				1320	data.feat[2]);
				1321	return 0;
				1322	}
				1323
				1324	static int kvm_s390_get_machine_feat(struct kvm *kvm,
				1325	struct kvm_device_attr *attr)
				1326	{
				1327	struct kvm_s390_vm_cpu_feat data;
				1328
				1329	bitmap_copy((unsigned long *) data.feat,
				1330	kvm_s390_available_cpu_feat,
				1331	KVM_S390_VM_CPU_FEAT_NR_BITS);
				1332	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
				1333	return -EFAULT;
				1334	VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
				1335	data.feat[0],
				1336	data.feat[1],
				1337	data.feat[2]);
				1338	return 0;
				1339	}
				1340
				1341	static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
				1342	struct kvm_device_attr *attr)
				1343	{
				1344	/*
				1345	* Once we can actually configure subfunctions (kernel + hw support),
				1346	* we have to check if they were already set by user space, if so copy
				1347	* them from kvm->arch.
				1348	*/
				1349	return -ENXIO;
				1350	}
				1351
				1352	static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
				1353	struct kvm_device_attr *attr)
				1354	{
				1355	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
				1356	sizeof(struct kvm_s390_vm_cpu_subfunc)))
				1357	return -EFAULT;
				1358	return 0;
				1359	}
				1360	static int kvm_s390_get_cpu_model(struct kvm kvm, struct kvm_device_attr attr)
				1361	{
				1362	int ret = -ENXIO;
				1363
				1364	switch (attr->attr) {
				1365	case KVM_S390_VM_CPU_PROCESSOR:
				1366	ret = kvm_s390_get_processor(kvm, attr);
				1367	break;
				1368	case KVM_S390_VM_CPU_MACHINE:
				1369	ret = kvm_s390_get_machine(kvm, attr);
				1370	break;
				1371	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
				1372	ret = kvm_s390_get_processor_feat(kvm, attr);
				1373	break;
				1374	case KVM_S390_VM_CPU_MACHINE_FEAT:
				1375	ret = kvm_s390_get_machine_feat(kvm, attr);
				1376	break;
				1377	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
				1378	ret = kvm_s390_get_processor_subfunc(kvm, attr);
				1379	break;
				1380	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
				1381	ret = kvm_s390_get_machine_subfunc(kvm, attr);
				1382	break;
				1383	}
				1384	return ret;
				1385	}
				1386
				1387	static int kvm_s390_vm_set_attr(struct kvm kvm, struct kvm_device_attr attr)
				1388	{
				1389	int ret;
				1390
				1391	switch (attr->group) {
				1392	case KVM_S390_VM_MEM_CTRL:
				1393	ret = kvm_s390_set_mem_control(kvm, attr);
				1394	break;
				1395	case KVM_S390_VM_TOD:
				1396	ret = kvm_s390_set_tod(kvm, attr);
				1397	break;
				1398	case KVM_S390_VM_CPU_MODEL:
				1399	ret = kvm_s390_set_cpu_model(kvm, attr);
				1400	break;
				1401	case KVM_S390_VM_CRYPTO:
				1402	ret = kvm_s390_vm_set_crypto(kvm, attr);
				1403	break;
				1404	case KVM_S390_VM_MIGRATION:
				1405	ret = kvm_s390_vm_set_migration(kvm, attr);
				1406	break;
				1407	default:
				1408	ret = -ENXIO;
				1409	break;
				1410	}
				1411
				1412	return ret;
				1413	}
				1414
				1415	static int kvm_s390_vm_get_attr(struct kvm kvm, struct kvm_device_attr attr)
				1416	{
				1417	int ret;
				1418
				1419	switch (attr->group) {
				1420	case KVM_S390_VM_MEM_CTRL:
				1421	ret = kvm_s390_get_mem_control(kvm, attr);
				1422	break;
				1423	case KVM_S390_VM_TOD:
				1424	ret = kvm_s390_get_tod(kvm, attr);
				1425	break;
				1426	case KVM_S390_VM_CPU_MODEL:
				1427	ret = kvm_s390_get_cpu_model(kvm, attr);
				1428	break;
				1429	case KVM_S390_VM_MIGRATION:
				1430	ret = kvm_s390_vm_get_migration(kvm, attr);
				1431	break;
				1432	default:
				1433	ret = -ENXIO;
				1434	break;
				1435	}
				1436
				1437	return ret;
				1438	}
				1439
				1440	static int kvm_s390_vm_has_attr(struct kvm kvm, struct kvm_device_attr attr)
				1441	{
				1442	int ret;
				1443
				1444	switch (attr->group) {
				1445	case KVM_S390_VM_MEM_CTRL:
				1446	switch (attr->attr) {
				1447	case KVM_S390_VM_MEM_ENABLE_CMMA:
				1448	case KVM_S390_VM_MEM_CLR_CMMA:
				1449	ret = sclp.has_cmma ? 0 : -ENXIO;
				1450	break;
				1451	case KVM_S390_VM_MEM_LIMIT_SIZE:
				1452	ret = 0;
				1453	break;
				1454	default:
				1455	ret = -ENXIO;
				1456	break;
				1457	}
				1458	break;
				1459	case KVM_S390_VM_TOD:
				1460	switch (attr->attr) {
				1461	case KVM_S390_VM_TOD_LOW:
				1462	case KVM_S390_VM_TOD_HIGH:
				1463	ret = 0;
				1464	break;
				1465	default:
				1466	ret = -ENXIO;
				1467	break;
				1468	}
				1469	break;
				1470	case KVM_S390_VM_CPU_MODEL:
				1471	switch (attr->attr) {
				1472	case KVM_S390_VM_CPU_PROCESSOR:
				1473	case KVM_S390_VM_CPU_MACHINE:
				1474	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
				1475	case KVM_S390_VM_CPU_MACHINE_FEAT:
				1476	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
				1477	ret = 0;
				1478	break;
				1479	/* configuring subfunctions is not supported yet */
				1480	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
				1481	default:
				1482	ret = -ENXIO;
				1483	break;
				1484	}
				1485	break;
				1486	case KVM_S390_VM_CRYPTO:
				1487	switch (attr->attr) {
				1488	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
				1489	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
				1490	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
				1491	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
				1492	ret = 0;
				1493	break;
				1494	default:
				1495	ret = -ENXIO;
				1496	break;
				1497	}
				1498	break;
				1499	case KVM_S390_VM_MIGRATION:
				1500	ret = 0;
				1501	break;
				1502	default:
				1503	ret = -ENXIO;
				1504	break;
				1505	}
				1506
				1507	return ret;
				1508	}
				1509
				1510	static long kvm_s390_get_skeys(struct kvm kvm, struct kvm_s390_skeys args)
				1511	{
				1512	uint8_t *keys;
				1513	uint64_t hva;
				1514	int srcu_idx, i, r = 0;
				1515
				1516	if (args->flags != 0)
				1517	return -EINVAL;
				1518
				1519	/* Is this guest using storage keys? */
				1520	if (!mm_uses_skeys(current->mm))
				1521	return KVM_S390_GET_SKEYS_NONE;
				1522
				1523	/* Enforce sane limit on memory allocation */
				1524	if (args->count < 1 \|\| args->count > KVM_S390_SKEYS_MAX)
				1525	return -EINVAL;
				1526
				1527	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
				1528	if (!keys)
				1529	return -ENOMEM;
				1530
				1531	down_read(&current->mm->mmap_sem);
				1532	srcu_idx = srcu_read_lock(&kvm->srcu);
				1533	for (i = 0; i < args->count; i++) {
				1534	hva = gfn_to_hva(kvm, args->start_gfn + i);
				1535	if (kvm_is_error_hva(hva)) {
				1536	r = -EFAULT;
				1537	break;
				1538	}
				1539
				1540	r = get_guest_storage_key(current->mm, hva, &keys[i]);
				1541	if (r)
				1542	break;
				1543	}
				1544	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1545	up_read(&current->mm->mmap_sem);
				1546
				1547	if (!r) {
				1548	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
				1549	sizeof(uint8_t) * args->count);
				1550	if (r)
				1551	r = -EFAULT;
				1552	}
				1553
				1554	kvfree(keys);
				1555	return r;
				1556	}
				1557
				1558	static long kvm_s390_set_skeys(struct kvm kvm, struct kvm_s390_skeys args)
				1559	{
				1560	uint8_t *keys;
				1561	uint64_t hva;
				1562	int srcu_idx, i, r = 0;
				1563	bool unlocked;
				1564
				1565	if (args->flags != 0)
				1566	return -EINVAL;
				1567
				1568	/* Enforce sane limit on memory allocation */
				1569	if (args->count < 1 \|\| args->count > KVM_S390_SKEYS_MAX)
				1570	return -EINVAL;
				1571
				1572	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
				1573	if (!keys)
				1574	return -ENOMEM;
				1575
				1576	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
				1577	sizeof(uint8_t) * args->count);
				1578	if (r) {
				1579	r = -EFAULT;
				1580	goto out;
				1581	}
				1582
				1583	/* Enable storage key handling for the guest */
				1584	r = s390_enable_skey();
				1585	if (r)
				1586	goto out;
				1587
				1588	i = 0;
				1589	down_read(&current->mm->mmap_sem);
				1590	srcu_idx = srcu_read_lock(&kvm->srcu);
				1591	while (i < args->count) {
				1592	unlocked = false;
				1593	hva = gfn_to_hva(kvm, args->start_gfn + i);
				1594	if (kvm_is_error_hva(hva)) {
				1595	r = -EFAULT;
				1596	break;
				1597	}
				1598
				1599	/* Lowest order bit is reserved */
				1600	if (keys[i] & 0x01) {
				1601	r = -EINVAL;
				1602	break;
				1603	}
				1604
				1605	r = set_guest_storage_key(current->mm, hva, keys[i], 0);
				1606	if (r) {
				1607	r = fixup_user_fault(current, current->mm, hva,
				1608	FAULT_FLAG_WRITE, &unlocked);
				1609	if (r)
				1610	break;
				1611	}
				1612	if (!r)
				1613	i++;
				1614	}
				1615	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1616	up_read(&current->mm->mmap_sem);
				1617	out:
				1618	kvfree(keys);
				1619	return r;
				1620	}
				1621
				1622	/*
				1623	* Base address and length must be sent at the start of each block, therefore
				1624	* it's cheaper to send some clean data, as long as it's less than the size of
				1625	* two longs.
				1626	*/
				1627	#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
				1628	/* for consistency */
				1629	#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
				1630
				1631	/*
				1632	* Similar to gfn_to_memslot, but returns the index of a memslot also when the
				1633	* address falls in a hole. In that case the index of one of the memslots
				1634	* bordering the hole is returned.
				1635	*/
				1636	static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
				1637	{
				1638	int start = 0, end = slots->used_slots;
				1639	int slot = atomic_read(&slots->lru_slot);
				1640	struct kvm_memory_slot *memslots = slots->memslots;
				1641
				1642	if (gfn >= memslots[slot].base_gfn &&
				1643	gfn < memslots[slot].base_gfn + memslots[slot].npages)
				1644	return slot;
				1645
				1646	while (start < end) {
				1647	slot = start + (end - start) / 2;
				1648
				1649	if (gfn >= memslots[slot].base_gfn)
				1650	end = slot;
				1651	else
				1652	start = slot + 1;
				1653	}
				1654
				1655	if (gfn >= memslots[start].base_gfn &&
				1656	gfn < memslots[start].base_gfn + memslots[start].npages) {
				1657	atomic_set(&slots->lru_slot, start);
				1658	}
				1659
				1660	return start;
				1661	}
				1662
				1663	static int kvm_s390_peek_cmma(struct kvm kvm, struct kvm_s390_cmma_log args,
				1664	u8 *res, unsigned long bufsize)
				1665	{
				1666	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
				1667
				1668	args->count = 0;
				1669	while (args->count < bufsize) {
				1670	hva = gfn_to_hva(kvm, cur_gfn);
				1671	/*
				1672	* We return an error if the first value was invalid, but we
				1673	* return successfully if at least one value was copied.
				1674	*/
				1675	if (kvm_is_error_hva(hva))
				1676	return args->count ? 0 : -EFAULT;
				1677	if (get_pgste(kvm->mm, hva, &pgstev) < 0)
				1678	pgstev = 0;
				1679	res[args->count++] = (pgstev >> 24) & 0x43;
				1680	cur_gfn++;
				1681	}
				1682
				1683	return 0;
				1684	}
				1685
				1686	static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
				1687	unsigned long cur_gfn)
				1688	{
				1689	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
				1690	struct kvm_memory_slot *ms = slots->memslots + slotidx;
				1691	unsigned long ofs = cur_gfn - ms->base_gfn;
				1692
				1693	if (ms->base_gfn + ms->npages <= cur_gfn) {
				1694	slotidx--;
				1695	/* If we are above the highest slot, wrap around */
				1696	if (slotidx < 0)
				1697	slotidx = slots->used_slots - 1;
				1698
				1699	ms = slots->memslots + slotidx;
				1700	ofs = 0;
				1701	}
				1702	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
				1703	while ((slotidx > 0) && (ofs >= ms->npages)) {
				1704	slotidx--;
				1705	ms = slots->memslots + slotidx;
				1706	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
				1707	}
				1708	return ms->base_gfn + ofs;
				1709	}
				1710
				1711	static int kvm_s390_get_cmma(struct kvm kvm, struct kvm_s390_cmma_log args,
				1712	u8 *res, unsigned long bufsize)
				1713	{
				1714	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
				1715	struct kvm_memslots *slots = kvm_memslots(kvm);
				1716	struct kvm_memory_slot *ms;
				1717
				1718	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
				1719	ms = gfn_to_memslot(kvm, cur_gfn);
				1720	args->count = 0;
				1721	args->start_gfn = cur_gfn;
				1722	if (!ms)
				1723	return 0;
				1724	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
				1725	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
				1726
				1727	while (args->count < bufsize) {
				1728	hva = gfn_to_hva(kvm, cur_gfn);
				1729	if (kvm_is_error_hva(hva))
				1730	return 0;
				1731	/* Decrement only if we actually flipped the bit to 0 */
				1732	if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
				1733	atomic64_dec(&kvm->arch.cmma_dirty_pages);
				1734	if (get_pgste(kvm->mm, hva, &pgstev) < 0)
				1735	pgstev = 0;
				1736	/* Save the value */
				1737	res[args->count++] = (pgstev >> 24) & 0x43;
				1738	/* If the next bit is too far away, stop. */
				1739	if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
				1740	return 0;
				1741	/* If we reached the previous "next", find the next one */
				1742	if (cur_gfn == next_gfn)
				1743	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
				1744	/* Reached the end of memory or of the buffer, stop */
				1745	if ((next_gfn >= mem_end) \|\|
				1746	(next_gfn - args->start_gfn >= bufsize))
				1747	return 0;
				1748	cur_gfn++;
				1749	/* Reached the end of the current memslot, take the next one. */
				1750	if (cur_gfn - ms->base_gfn >= ms->npages) {
				1751	ms = gfn_to_memslot(kvm, cur_gfn);
				1752	if (!ms)
				1753	return 0;
				1754	}
				1755	}
				1756	return 0;
				1757	}
				1758
				1759	/*
				1760	* This function searches for the next page with dirty CMMA attributes, and
				1761	* saves the attributes in the buffer up to either the end of the buffer or
				1762	* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
				1763	* no trailing clean bytes are saved.
				1764	* In case no dirty bits were found, or if CMMA was not enabled or used, the
				1765	* output buffer will indicate 0 as length.
				1766	*/
				1767	static int kvm_s390_get_cmma_bits(struct kvm *kvm,
				1768	struct kvm_s390_cmma_log *args)
				1769	{
				1770	unsigned long bufsize;
				1771	int srcu_idx, peek, ret;
				1772	u8 *values;
				1773
				1774	if (!kvm->arch.use_cmma)
				1775	return -ENXIO;
				1776	/* Invalid/unsupported flags were specified */
				1777	if (args->flags & ~KVM_S390_CMMA_PEEK)
				1778	return -EINVAL;
				1779	/* Migration mode query, and we are not doing a migration */
				1780	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
				1781	if (!peek && !kvm->arch.migration_mode)
				1782	return -EINVAL;
				1783	/* CMMA is disabled or was not used, or the buffer has length zero */
				1784	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
				1785	if (!bufsize \|\| !kvm->mm->context.uses_cmm) {
				1786	memset(args, 0, sizeof(*args));
				1787	return 0;
				1788	}
				1789	/* We are not peeking, and there are no dirty pages */
				1790	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
				1791	memset(args, 0, sizeof(*args));
				1792	return 0;
				1793	}
				1794
				1795	values = vmalloc(bufsize);
				1796	if (!values)
				1797	return -ENOMEM;
				1798
				1799	down_read(&kvm->mm->mmap_sem);
				1800	srcu_idx = srcu_read_lock(&kvm->srcu);
				1801	if (peek)
				1802	ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
				1803	else
				1804	ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
				1805	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1806	up_read(&kvm->mm->mmap_sem);
				1807
				1808	if (kvm->arch.migration_mode)
				1809	args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
				1810	else
				1811	args->remaining = 0;
				1812
				1813	if (copy_to_user((void __user *)args->values, values, args->count))
				1814	ret = -EFAULT;
				1815
				1816	vfree(values);
				1817	return ret;
				1818	}
				1819
				1820	/*
				1821	* This function sets the CMMA attributes for the given pages. If the input
				1822	* buffer has zero length, no action is taken, otherwise the attributes are
				1823	* set and the mm->context.uses_cmm flag is set.
				1824	*/
				1825	static int kvm_s390_set_cmma_bits(struct kvm *kvm,
				1826	const struct kvm_s390_cmma_log *args)
				1827	{
				1828	unsigned long hva, mask, pgstev, i;
				1829	uint8_t *bits;
				1830	int srcu_idx, r = 0;
				1831
				1832	mask = args->mask;
				1833
				1834	if (!kvm->arch.use_cmma)
				1835	return -ENXIO;
				1836	/* invalid/unsupported flags */
				1837	if (args->flags != 0)
				1838	return -EINVAL;
				1839	/* Enforce sane limit on memory allocation */
				1840	if (args->count > KVM_S390_CMMA_SIZE_MAX)
				1841	return -EINVAL;
				1842	/* Nothing to do */
				1843	if (args->count == 0)
				1844	return 0;
				1845
				1846	bits = vmalloc(array_size(sizeof(*bits), args->count));
				1847	if (!bits)
				1848	return -ENOMEM;
				1849
				1850	r = copy_from_user(bits, (void __user *)args->values, args->count);
				1851	if (r) {
				1852	r = -EFAULT;
				1853	goto out;
				1854	}
				1855
				1856	down_read(&kvm->mm->mmap_sem);
				1857	srcu_idx = srcu_read_lock(&kvm->srcu);
				1858	for (i = 0; i < args->count; i++) {
				1859	hva = gfn_to_hva(kvm, args->start_gfn + i);
				1860	if (kvm_is_error_hva(hva)) {
				1861	r = -EFAULT;
				1862	break;
				1863	}
				1864
				1865	pgstev = bits[i];
				1866	pgstev = pgstev << 24;
				1867	mask &= _PGSTE_GPS_USAGE_MASK \| _PGSTE_GPS_NODAT;
				1868	set_pgste_bits(kvm->mm, hva, mask, pgstev);
				1869	}
				1870	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1871	up_read(&kvm->mm->mmap_sem);
				1872
				1873	if (!kvm->mm->context.uses_cmm) {
				1874	down_write(&kvm->mm->mmap_sem);
				1875	kvm->mm->context.uses_cmm = 1;
				1876	up_write(&kvm->mm->mmap_sem);
				1877	}
				1878	out:
				1879	vfree(bits);
				1880	return r;
				1881	}
				1882
				1883	long kvm_arch_vm_ioctl(struct file *filp,
				1884	unsigned int ioctl, unsigned long arg)
				1885	{
				1886	struct kvm *kvm = filp->private_data;
				1887	void __user argp = (void __user )arg;
				1888	struct kvm_device_attr attr;
				1889	int r;
				1890
				1891	switch (ioctl) {
				1892	case KVM_S390_INTERRUPT: {
				1893	struct kvm_s390_interrupt s390int;
				1894
				1895	r = -EFAULT;
				1896	if (copy_from_user(&s390int, argp, sizeof(s390int)))
				1897	break;
				1898	r = kvm_s390_inject_vm(kvm, &s390int);
				1899	break;
				1900	}
				1901	case KVM_ENABLE_CAP: {
				1902	struct kvm_enable_cap cap;
				1903	r = -EFAULT;
				1904	if (copy_from_user(&cap, argp, sizeof(cap)))
				1905	break;
				1906	r = kvm_vm_ioctl_enable_cap(kvm, &cap);
				1907	break;
				1908	}
				1909	case KVM_CREATE_IRQCHIP: {
				1910	struct kvm_irq_routing_entry routing;
				1911
				1912	r = -EINVAL;
				1913	if (kvm->arch.use_irqchip) {
				1914	/* Set up dummy routing. */
				1915	memset(&routing, 0, sizeof(routing));
				1916	r = kvm_set_irq_routing(kvm, &routing, 0, 0);
				1917	}
				1918	break;
				1919	}
				1920	case KVM_SET_DEVICE_ATTR: {
				1921	r = -EFAULT;
				1922	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
				1923	break;
				1924	r = kvm_s390_vm_set_attr(kvm, &attr);
				1925	break;
				1926	}
				1927	case KVM_GET_DEVICE_ATTR: {
				1928	r = -EFAULT;
				1929	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
				1930	break;
				1931	r = kvm_s390_vm_get_attr(kvm, &attr);
				1932	break;
				1933	}
				1934	case KVM_HAS_DEVICE_ATTR: {
				1935	r = -EFAULT;
				1936	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
				1937	break;
				1938	r = kvm_s390_vm_has_attr(kvm, &attr);
				1939	break;
				1940	}
				1941	case KVM_S390_GET_SKEYS: {
				1942	struct kvm_s390_skeys args;
				1943
				1944	r = -EFAULT;
				1945	if (copy_from_user(&args, argp,
				1946	sizeof(struct kvm_s390_skeys)))
				1947	break;
				1948	r = kvm_s390_get_skeys(kvm, &args);
				1949	break;
				1950	}
				1951	case KVM_S390_SET_SKEYS: {
				1952	struct kvm_s390_skeys args;
				1953
				1954	r = -EFAULT;
				1955	if (copy_from_user(&args, argp,
				1956	sizeof(struct kvm_s390_skeys)))
				1957	break;
				1958	r = kvm_s390_set_skeys(kvm, &args);
				1959	break;
				1960	}
				1961	case KVM_S390_GET_CMMA_BITS: {
				1962	struct kvm_s390_cmma_log args;
				1963
				1964	r = -EFAULT;
				1965	if (copy_from_user(&args, argp, sizeof(args)))
				1966	break;
				1967	mutex_lock(&kvm->slots_lock);
				1968	r = kvm_s390_get_cmma_bits(kvm, &args);
				1969	mutex_unlock(&kvm->slots_lock);
				1970	if (!r) {
				1971	r = copy_to_user(argp, &args, sizeof(args));
				1972	if (r)
				1973	r = -EFAULT;
				1974	}
				1975	break;
				1976	}
				1977	case KVM_S390_SET_CMMA_BITS: {
				1978	struct kvm_s390_cmma_log args;
				1979
				1980	r = -EFAULT;
				1981	if (copy_from_user(&args, argp, sizeof(args)))
				1982	break;
				1983	mutex_lock(&kvm->slots_lock);
				1984	r = kvm_s390_set_cmma_bits(kvm, &args);
				1985	mutex_unlock(&kvm->slots_lock);
				1986	break;
				1987	}
				1988	default:
				1989	r = -ENOTTY;
				1990	}
				1991
				1992	return r;
				1993	}
				1994
				1995	static int kvm_s390_query_ap_config(u8 *config)
				1996	{
				1997	u32 fcn_code = 0x04000000UL;
				1998	u32 cc = 0;
				1999
				2000	memset(config, 0, 128);
				2001	asm volatile(
				2002	"lgr 0,%1\n"
				2003	"lgr 2,%2\n"
				2004	".long 0xb2af0000\n" /* PQAP(QCI) */
				2005	"0: ipm %0\n"
				2006	"srl %0,28\n"
				2007	"1:\n"
				2008	EX_TABLE(0b, 1b)
				2009	: "+r" (cc)
				2010	: "r" (fcn_code), "r" (config)
				2011	: "cc", "0", "2", "memory"
				2012	);
				2013
				2014	return cc;
				2015	}
				2016
				2017	static int kvm_s390_apxa_installed(void)
				2018	{
				2019	u8 config[128];
				2020	int cc;
				2021
				2022	if (test_facility(12)) {
				2023	cc = kvm_s390_query_ap_config(config);
				2024
				2025	if (cc)
				2026	pr_err("PQAP(QCI) failed with cc=%d", cc);
				2027	else
				2028	return config[0] & 0x40;
				2029	}
				2030
				2031	return 0;
				2032	}
				2033
				2034	static void kvm_s390_set_crycb_format(struct kvm *kvm)
				2035	{
				2036	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
				2037
				2038	if (kvm_s390_apxa_installed())
				2039	kvm->arch.crypto.crycbd \|= CRYCB_FORMAT2;
				2040	else
				2041	kvm->arch.crypto.crycbd \|= CRYCB_FORMAT1;
				2042	}
				2043
				2044	static u64 kvm_s390_get_initial_cpuid(void)
				2045	{
				2046	struct cpuid cpuid;
				2047
				2048	get_cpu_id(&cpuid);
				2049	cpuid.version = 0xff;
				2050	return ((u64 ) &cpuid);
				2051	}
				2052
				2053	static void kvm_s390_crypto_init(struct kvm *kvm)
				2054	{
				2055	if (!test_kvm_facility(kvm, 76))
				2056	return;
				2057
				2058	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
				2059	kvm_s390_set_crycb_format(kvm);
				2060
				2061	/* Enable AES/DEA protected key functions by default */
				2062	kvm->arch.crypto.aes_kw = 1;
				2063	kvm->arch.crypto.dea_kw = 1;
				2064	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
				2065	sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
				2066	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
				2067	sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
				2068	}
				2069
				2070	static void sca_dispose(struct kvm *kvm)
				2071	{
				2072	if (kvm->arch.use_esca)
				2073	free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
				2074	else
				2075	free_page((unsigned long)(kvm->arch.sca));
				2076	kvm->arch.sca = NULL;
				2077	}
				2078
				2079	int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
				2080	{
				2081	gfp_t alloc_flags = GFP_KERNEL;
				2082	int i, rc;
				2083	char debug_name[16];
				2084	static unsigned long sca_offset;
				2085
				2086	rc = -EINVAL;
				2087	#ifdef CONFIG_KVM_S390_UCONTROL
				2088	if (type & ~KVM_VM_S390_UCONTROL)
				2089	goto out_err;
				2090	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
				2091	goto out_err;
				2092	#else
				2093	if (type)
				2094	goto out_err;
				2095	#endif
				2096
				2097	rc = s390_enable_sie();
				2098	if (rc)
				2099	goto out_err;
				2100
				2101	rc = -ENOMEM;
				2102
				2103	if (!sclp.has_64bscao)
				2104	alloc_flags \|= GFP_DMA;
				2105	rwlock_init(&kvm->arch.sca_lock);
				2106	/* start with basic SCA */
				2107	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
				2108	if (!kvm->arch.sca)
				2109	goto out_err;
				2110	spin_lock(&kvm_lock);
				2111	sca_offset += 16;
				2112	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
				2113	sca_offset = 0;
				2114	kvm->arch.sca = (struct bsca_block *)
				2115	((char *) kvm->arch.sca + sca_offset);
				2116	spin_unlock(&kvm_lock);
				2117
				2118	sprintf(debug_name, "kvm-%u", current->pid);
				2119
				2120	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
				2121	if (!kvm->arch.dbf)
				2122	goto out_err;
				2123
				2124	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
				2125	kvm->arch.sie_page2 =
				2126	(struct sie_page2 *) get_zeroed_page(GFP_KERNEL \| GFP_DMA);
				2127	if (!kvm->arch.sie_page2)
				2128	goto out_err;
				2129
				2130	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
				2131
				2132	for (i = 0; i < kvm_s390_fac_size(); i++) {
				2133	kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
				2134	(kvm_s390_fac_base[i] \|
				2135	kvm_s390_fac_ext[i]);
				2136	kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
				2137	kvm_s390_fac_base[i];
				2138	}
				2139
				2140	/* we are always in czam mode - even on pre z14 machines */
				2141	set_kvm_facility(kvm->arch.model.fac_mask, 138);
				2142	set_kvm_facility(kvm->arch.model.fac_list, 138);
				2143	/* we emulate STHYI in kvm */
				2144	set_kvm_facility(kvm->arch.model.fac_mask, 74);
				2145	set_kvm_facility(kvm->arch.model.fac_list, 74);
				2146	if (MACHINE_HAS_TLB_GUEST) {
				2147	set_kvm_facility(kvm->arch.model.fac_mask, 147);
				2148	set_kvm_facility(kvm->arch.model.fac_list, 147);
				2149	}
				2150
				2151	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
				2152	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
				2153
				2154	kvm_s390_crypto_init(kvm);
				2155
				2156	mutex_init(&kvm->arch.float_int.ais_lock);
				2157	spin_lock_init(&kvm->arch.float_int.lock);
				2158	for (i = 0; i < FIRQ_LIST_COUNT; i++)
				2159	INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
				2160	init_waitqueue_head(&kvm->arch.ipte_wq);
				2161	mutex_init(&kvm->arch.ipte_mutex);
				2162
				2163	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
				2164	VM_EVENT(kvm, 3, "vm created with type %lu", type);
				2165
				2166	if (type & KVM_VM_S390_UCONTROL) {
				2167	kvm->arch.gmap = NULL;
				2168	kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
				2169	} else {
				2170	if (sclp.hamax == U64_MAX)
				2171	kvm->arch.mem_limit = TASK_SIZE_MAX;
				2172	else
				2173	kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
				2174	sclp.hamax + 1);
				2175	kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
				2176	if (!kvm->arch.gmap)
				2177	goto out_err;
				2178	kvm->arch.gmap->private = kvm;
				2179	kvm->arch.gmap->pfault_enabled = 0;
				2180	}
				2181
				2182	kvm->arch.use_pfmfi = sclp.has_pfmfi;
				2183	kvm->arch.use_skf = sclp.has_skey;
				2184	spin_lock_init(&kvm->arch.start_stop_lock);
				2185	kvm_s390_vsie_init(kvm);
				2186	kvm_s390_gisa_init(kvm);
				2187	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
				2188
				2189	return 0;
				2190	out_err:
				2191	free_page((unsigned long)kvm->arch.sie_page2);
				2192	debug_unregister(kvm->arch.dbf);
				2193	sca_dispose(kvm);
				2194	KVM_EVENT(3, "creation of vm failed: %d", rc);
				2195	return rc;
				2196	}
				2197
				2198	bool kvm_arch_has_vcpu_debugfs(void)
				2199	{
				2200	return false;
				2201	}
				2202
				2203	int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
				2204	{
				2205	return 0;
				2206	}
				2207
				2208	void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
				2209	{
				2210	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
				2211	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
				2212	kvm_s390_clear_local_irqs(vcpu);
				2213	kvm_clear_async_pf_completion_queue(vcpu);
				2214	if (!kvm_is_ucontrol(vcpu->kvm))
				2215	sca_del_vcpu(vcpu);
				2216
				2217	if (kvm_is_ucontrol(vcpu->kvm))
				2218	gmap_remove(vcpu->arch.gmap);
				2219
				2220	if (vcpu->kvm->arch.use_cmma)
				2221	kvm_s390_vcpu_unsetup_cmma(vcpu);
				2222	free_page((unsigned long)(vcpu->arch.sie_block));
				2223
				2224	kvm_vcpu_uninit(vcpu);
				2225	kmem_cache_free(kvm_vcpu_cache, vcpu);
				2226	}
				2227
				2228	static void kvm_free_vcpus(struct kvm *kvm)
				2229	{
				2230	unsigned int i;
				2231	struct kvm_vcpu *vcpu;
				2232
				2233	kvm_for_each_vcpu(i, vcpu, kvm)
				2234	kvm_arch_vcpu_destroy(vcpu);
				2235
				2236	mutex_lock(&kvm->lock);
				2237	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
				2238	kvm->vcpus[i] = NULL;
				2239
				2240	atomic_set(&kvm->online_vcpus, 0);
				2241	mutex_unlock(&kvm->lock);
				2242	}
				2243
				2244	void kvm_arch_destroy_vm(struct kvm *kvm)
				2245	{
				2246	kvm_free_vcpus(kvm);
				2247	sca_dispose(kvm);
				2248	debug_unregister(kvm->arch.dbf);
				2249	kvm_s390_gisa_destroy(kvm);
				2250	free_page((unsigned long)kvm->arch.sie_page2);
				2251	if (!kvm_is_ucontrol(kvm))
				2252	gmap_remove(kvm->arch.gmap);
				2253	kvm_s390_destroy_adapters(kvm);
				2254	kvm_s390_clear_float_irqs(kvm);
				2255	kvm_s390_vsie_destroy(kvm);
				2256	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
				2257	}
				2258
				2259	/* Section: vcpu related */
				2260	static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
				2261	{
				2262	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
				2263	if (!vcpu->arch.gmap)
				2264	return -ENOMEM;
				2265	vcpu->arch.gmap->private = vcpu->kvm;
				2266
				2267	return 0;
				2268	}
				2269
				2270	static void sca_del_vcpu(struct kvm_vcpu *vcpu)
				2271	{
				2272	if (!kvm_s390_use_sca_entries())
				2273	return;
				2274	read_lock(&vcpu->kvm->arch.sca_lock);
				2275	if (vcpu->kvm->arch.use_esca) {
				2276	struct esca_block *sca = vcpu->kvm->arch.sca;
				2277
				2278	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
				2279	sca->cpu[vcpu->vcpu_id].sda = 0;
				2280	} else {
				2281	struct bsca_block *sca = vcpu->kvm->arch.sca;
				2282
				2283	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
				2284	sca->cpu[vcpu->vcpu_id].sda = 0;
				2285	}
				2286	read_unlock(&vcpu->kvm->arch.sca_lock);
				2287	}
				2288
				2289	static void sca_add_vcpu(struct kvm_vcpu *vcpu)
				2290	{
				2291	if (!kvm_s390_use_sca_entries()) {
				2292	struct bsca_block *sca = vcpu->kvm->arch.sca;
				2293
				2294	/* we still need the basic sca for the ipte control */
				2295	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
				2296	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
				2297	return;
				2298	}
				2299	read_lock(&vcpu->kvm->arch.sca_lock);
				2300	if (vcpu->kvm->arch.use_esca) {
				2301	struct esca_block *sca = vcpu->kvm->arch.sca;
				2302
				2303	sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
				2304	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
				2305	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
				2306	vcpu->arch.sie_block->ecb2 \|= ECB2_ESCA;
				2307	set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
				2308	} else {
				2309	struct bsca_block *sca = vcpu->kvm->arch.sca;
				2310
				2311	sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
				2312	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
				2313	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
				2314	set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
				2315	}
				2316	read_unlock(&vcpu->kvm->arch.sca_lock);
				2317	}
				2318
				2319	/* Basic SCA to Extended SCA data copy routines */
				2320	static inline void sca_copy_entry(struct esca_entry d, struct bsca_entry s)
				2321	{
				2322	d->sda = s->sda;
				2323	d->sigp_ctrl.c = s->sigp_ctrl.c;
				2324	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
				2325	}
				2326
				2327	static void sca_copy_b_to_e(struct esca_block d, struct bsca_block s)
				2328	{
				2329	int i;
				2330
				2331	d->ipte_control = s->ipte_control;
				2332	d->mcn[0] = s->mcn;
				2333	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
				2334	sca_copy_entry(&d->cpu[i], &s->cpu[i]);
				2335	}
				2336
				2337	static int sca_switch_to_extended(struct kvm *kvm)
				2338	{
				2339	struct bsca_block *old_sca = kvm->arch.sca;
				2340	struct esca_block *new_sca;
				2341	struct kvm_vcpu *vcpu;
				2342	unsigned int vcpu_idx;
				2343	u32 scaol, scaoh;
				2344
				2345	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL\|__GFP_ZERO);
				2346	if (!new_sca)
				2347	return -ENOMEM;
				2348
				2349	scaoh = (u32)((u64)(new_sca) >> 32);
				2350	scaol = (u32)(u64)(new_sca) & ~0x3fU;
				2351
				2352	kvm_s390_vcpu_block_all(kvm);
				2353	write_lock(&kvm->arch.sca_lock);
				2354
				2355	sca_copy_b_to_e(new_sca, old_sca);
				2356
				2357	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
				2358	vcpu->arch.sie_block->scaoh = scaoh;
				2359	vcpu->arch.sie_block->scaol = scaol;
				2360	vcpu->arch.sie_block->ecb2 \|= ECB2_ESCA;
				2361	}
				2362	kvm->arch.sca = new_sca;
				2363	kvm->arch.use_esca = 1;
				2364
				2365	write_unlock(&kvm->arch.sca_lock);
				2366	kvm_s390_vcpu_unblock_all(kvm);
				2367
				2368	free_page((unsigned long)old_sca);
				2369
				2370	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
				2371	old_sca, kvm->arch.sca);
				2372	return 0;
				2373	}
				2374
				2375	static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
				2376	{
				2377	int rc;
				2378
				2379	if (!kvm_s390_use_sca_entries()) {
				2380	if (id < KVM_MAX_VCPUS)
				2381	return true;
				2382	return false;
				2383	}
				2384	if (id < KVM_S390_BSCA_CPU_SLOTS)
				2385	return true;
				2386	if (!sclp.has_esca \|\| !sclp.has_64bscao)
				2387	return false;
				2388
				2389	mutex_lock(&kvm->lock);
				2390	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
				2391	mutex_unlock(&kvm->lock);
				2392
				2393	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
				2394	}
				2395
				2396	int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
				2397	{
				2398	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
				2399	kvm_clear_async_pf_completion_queue(vcpu);
				2400	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX \|
				2401	KVM_SYNC_GPRS \|
				2402	KVM_SYNC_ACRS \|
				2403	KVM_SYNC_CRS \|
				2404	KVM_SYNC_ARCH0 \|
				2405	KVM_SYNC_PFAULT;
				2406	kvm_s390_set_prefix(vcpu, 0);
				2407	if (test_kvm_facility(vcpu->kvm, 64))
				2408	vcpu->run->kvm_valid_regs \|= KVM_SYNC_RICCB;
				2409	if (test_kvm_facility(vcpu->kvm, 82))
				2410	vcpu->run->kvm_valid_regs \|= KVM_SYNC_BPBC;
				2411	if (test_kvm_facility(vcpu->kvm, 133))
				2412	vcpu->run->kvm_valid_regs \|= KVM_SYNC_GSCB;
				2413	if (test_kvm_facility(vcpu->kvm, 156))
				2414	vcpu->run->kvm_valid_regs \|= KVM_SYNC_ETOKEN;
				2415	/* fprs can be synchronized via vrs, even if the guest has no vx. With
				2416	* MACHINE_HAS_VX, (load\|store)_fpu_regs() will work with vrs format.
				2417	*/
				2418	if (MACHINE_HAS_VX)
				2419	vcpu->run->kvm_valid_regs \|= KVM_SYNC_VRS;
				2420	else
				2421	vcpu->run->kvm_valid_regs \|= KVM_SYNC_FPRS;
				2422
				2423	if (kvm_is_ucontrol(vcpu->kvm))
				2424	return __kvm_ucontrol_vcpu_init(vcpu);
				2425
				2426	return 0;
				2427	}
				2428
				2429	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2430	static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2431	{
				2432	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
				2433	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
				2434	vcpu->arch.cputm_start = get_tod_clock_fast();
				2435	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
				2436	}
				2437
				2438	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2439	static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2440	{
				2441	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
				2442	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
				2443	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
				2444	vcpu->arch.cputm_start = 0;
				2445	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
				2446	}
				2447
				2448	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2449	static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2450	{
				2451	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
				2452	vcpu->arch.cputm_enabled = true;
				2453	__start_cpu_timer_accounting(vcpu);
				2454	}
				2455
				2456	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2457	static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2458	{
				2459	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
				2460	__stop_cpu_timer_accounting(vcpu);
				2461	vcpu->arch.cputm_enabled = false;
				2462	}
				2463
				2464	static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2465	{
				2466	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2467	__enable_cpu_timer_accounting(vcpu);
				2468	preempt_enable();
				2469	}
				2470
				2471	static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2472	{
				2473	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2474	__disable_cpu_timer_accounting(vcpu);
				2475	preempt_enable();
				2476	}
				2477
				2478	/* set the cpu timer - may only be called from the VCPU thread itself */
				2479	void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
				2480	{
				2481	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2482	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
				2483	if (vcpu->arch.cputm_enabled)
				2484	vcpu->arch.cputm_start = get_tod_clock_fast();
				2485	vcpu->arch.sie_block->cputm = cputm;
				2486	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
				2487	preempt_enable();
				2488	}
				2489
				2490	/* update and get the cpu timer - can also be called from other VCPU threads */
				2491	__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
				2492	{
				2493	unsigned int seq;
				2494	__u64 value;
				2495
				2496	if (unlikely(!vcpu->arch.cputm_enabled))
				2497	return vcpu->arch.sie_block->cputm;
				2498
				2499	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2500	do {
				2501	seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
				2502	/*
				2503	* If the writer would ever execute a read in the critical
				2504	* section, e.g. in irq context, we have a deadlock.
				2505	*/
				2506	WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
				2507	value = vcpu->arch.sie_block->cputm;
				2508	/* if cputm_start is 0, accounting is being started/stopped */
				2509	if (likely(vcpu->arch.cputm_start))
				2510	value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
				2511	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
				2512	preempt_enable();
				2513	return value;
				2514	}
				2515
				2516	void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
				2517	{
				2518
				2519	gmap_enable(vcpu->arch.enabled_gmap);
				2520	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
				2521	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
				2522	__start_cpu_timer_accounting(vcpu);
				2523	vcpu->cpu = cpu;
				2524	}
				2525
				2526	void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
				2527	{
				2528	vcpu->cpu = -1;
				2529	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
				2530	__stop_cpu_timer_accounting(vcpu);
				2531	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
				2532	vcpu->arch.enabled_gmap = gmap_get_enabled();
				2533	gmap_disable(vcpu->arch.enabled_gmap);
				2534
				2535	}
				2536
				2537	static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
				2538	{
				2539	/* this equals initial cpu reset in pop, but we don't switch to ESA */
				2540	vcpu->arch.sie_block->gpsw.mask = 0UL;
				2541	vcpu->arch.sie_block->gpsw.addr = 0UL;
				2542	kvm_s390_set_prefix(vcpu, 0);
				2543	kvm_s390_set_cpu_timer(vcpu, 0);
				2544	vcpu->arch.sie_block->ckc = 0UL;
				2545	vcpu->arch.sie_block->todpr = 0;
				2546	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
				2547	vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 \|
				2548	CR0_INTERRUPT_KEY_SUBMASK \|
				2549	CR0_MEASUREMENT_ALERT_SUBMASK;
				2550	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 \|
				2551	CR14_UNUSED_33 \|
				2552	CR14_EXTERNAL_DAMAGE_SUBMASK;
				2553	/* make sure the new fpc will be lazily loaded */
				2554	save_fpu_regs();
				2555	current->thread.fpu.fpc = 0;
				2556	vcpu->arch.sie_block->gbea = 1;
				2557	vcpu->arch.sie_block->pp = 0;
				2558	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
				2559	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
				2560	kvm_clear_async_pf_completion_queue(vcpu);
				2561	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
				2562	kvm_s390_vcpu_stop(vcpu);
				2563	kvm_s390_clear_local_irqs(vcpu);
				2564	}
				2565
				2566	void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
				2567	{
				2568	mutex_lock(&vcpu->kvm->lock);
				2569	preempt_disable();
				2570	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
				2571	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
				2572	preempt_enable();
				2573	mutex_unlock(&vcpu->kvm->lock);
				2574	if (!kvm_is_ucontrol(vcpu->kvm)) {
				2575	vcpu->arch.gmap = vcpu->kvm->arch.gmap;
				2576	sca_add_vcpu(vcpu);
				2577	}
				2578	if (test_kvm_facility(vcpu->kvm, 74) \|\| vcpu->kvm->arch.user_instr0)
				2579	vcpu->arch.sie_block->ictl \|= ICTL_OPEREXC;
				2580	/* make vcpu_load load the right gmap on the first trigger */
				2581	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
				2582	}
				2583
				2584	static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
				2585	{
				2586	if (!test_kvm_facility(vcpu->kvm, 76))
				2587	return;
				2588
				2589	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES \| ECB3_DEA);
				2590
				2591	if (vcpu->kvm->arch.crypto.aes_kw)
				2592	vcpu->arch.sie_block->ecb3 \|= ECB3_AES;
				2593	if (vcpu->kvm->arch.crypto.dea_kw)
				2594	vcpu->arch.sie_block->ecb3 \|= ECB3_DEA;
				2595
				2596	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
				2597	}
				2598
				2599	void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
				2600	{
				2601	free_page(vcpu->arch.sie_block->cbrlo);
				2602	vcpu->arch.sie_block->cbrlo = 0;
				2603	}
				2604
				2605	int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
				2606	{
				2607	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
				2608	if (!vcpu->arch.sie_block->cbrlo)
				2609	return -ENOMEM;
				2610	return 0;
				2611	}
				2612
				2613	static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
				2614	{
				2615	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
				2616
				2617	vcpu->arch.sie_block->ibc = model->ibc;
				2618	if (test_kvm_facility(vcpu->kvm, 7))
				2619	vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
				2620	}
				2621
				2622	int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
				2623	{
				2624	int rc = 0;
				2625
				2626	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH \|
				2627	CPUSTAT_SM \|
				2628	CPUSTAT_STOPPED);
				2629
				2630	if (test_kvm_facility(vcpu->kvm, 78))
				2631	kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
				2632	else if (test_kvm_facility(vcpu->kvm, 8))
				2633	kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
				2634
				2635	kvm_s390_vcpu_setup_model(vcpu);
				2636
				2637	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
				2638	if (MACHINE_HAS_ESOP)
				2639	vcpu->arch.sie_block->ecb \|= ECB_HOSTPROTINT;
				2640	if (test_kvm_facility(vcpu->kvm, 9))
				2641	vcpu->arch.sie_block->ecb \|= ECB_SRSI;
				2642	if (test_kvm_facility(vcpu->kvm, 73))
				2643	vcpu->arch.sie_block->ecb \|= ECB_TE;
				2644
				2645	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
				2646	vcpu->arch.sie_block->ecb2 \|= ECB2_PFMFI;
				2647	if (test_kvm_facility(vcpu->kvm, 130))
				2648	vcpu->arch.sie_block->ecb2 \|= ECB2_IEP;
				2649	vcpu->arch.sie_block->eca = ECA_MVPGI \| ECA_PROTEXCI;
				2650	if (sclp.has_cei)
				2651	vcpu->arch.sie_block->eca \|= ECA_CEI;
				2652	if (sclp.has_ib)
				2653	vcpu->arch.sie_block->eca \|= ECA_IB;
				2654	if (sclp.has_siif)
				2655	vcpu->arch.sie_block->eca \|= ECA_SII;
				2656	if (sclp.has_sigpif)
				2657	vcpu->arch.sie_block->eca \|= ECA_SIGPI;
				2658	if (test_kvm_facility(vcpu->kvm, 129)) {
				2659	vcpu->arch.sie_block->eca \|= ECA_VX;
				2660	vcpu->arch.sie_block->ecd \|= ECD_HOSTREGMGMT;
				2661	}
				2662	if (test_kvm_facility(vcpu->kvm, 139))
				2663	vcpu->arch.sie_block->ecd \|= ECD_MEF;
				2664	if (test_kvm_facility(vcpu->kvm, 156))
				2665	vcpu->arch.sie_block->ecd \|= ECD_ETOKENF;
				2666	if (vcpu->arch.sie_block->gd) {
				2667	vcpu->arch.sie_block->eca \|= ECA_AIV;
				2668	VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
				2669	vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
				2670	}
				2671	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
				2672	\| SDNXC;
				2673	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
				2674
				2675	if (sclp.has_kss)
				2676	kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
				2677	else
				2678	vcpu->arch.sie_block->ictl \|= ICTL_ISKE \| ICTL_SSKE \| ICTL_RRBE;
				2679
				2680	if (vcpu->kvm->arch.use_cmma) {
				2681	rc = kvm_s390_vcpu_setup_cmma(vcpu);
				2682	if (rc)
				2683	return rc;
				2684	}
				2685	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				2686	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
				2687
				2688	kvm_s390_vcpu_crypto_setup(vcpu);
				2689
				2690	return rc;
				2691	}
				2692
				2693	struct kvm_vcpu kvm_arch_vcpu_create(struct kvm kvm,
				2694	unsigned int id)
				2695	{
				2696	struct kvm_vcpu *vcpu;
				2697	struct sie_page *sie_page;
				2698	int rc = -EINVAL;
				2699
				2700	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
				2701	goto out;
				2702
				2703	rc = -ENOMEM;
				2704
				2705	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
				2706	if (!vcpu)
				2707	goto out;
				2708
				2709	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
				2710	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
				2711	if (!sie_page)
				2712	goto out_free_cpu;
				2713
				2714	vcpu->arch.sie_block = &sie_page->sie_block;
				2715	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
				2716
				2717	/* the real guest size will always be smaller than msl */
				2718	vcpu->arch.sie_block->mso = 0;
				2719	vcpu->arch.sie_block->msl = sclp.hamax;
				2720
				2721	vcpu->arch.sie_block->icpua = id;
				2722	spin_lock_init(&vcpu->arch.local_int.lock);
				2723	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
				2724	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
				2725	vcpu->arch.sie_block->gd \|= GISA_FORMAT1;
				2726	seqcount_init(&vcpu->arch.cputm_seqcount);
				2727
				2728	rc = kvm_vcpu_init(vcpu, kvm, id);
				2729	if (rc)
				2730	goto out_free_sie_block;
				2731	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
				2732	vcpu->arch.sie_block);
				2733	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
				2734
				2735	return vcpu;
				2736	out_free_sie_block:
				2737	free_page((unsigned long)(vcpu->arch.sie_block));
				2738	out_free_cpu:
				2739	kmem_cache_free(kvm_vcpu_cache, vcpu);
				2740	out:
				2741	return ERR_PTR(rc);
				2742	}
				2743
				2744	int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
				2745	{
				2746	return kvm_s390_vcpu_has_irq(vcpu, 0);
				2747	}
				2748
				2749	bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
				2750	{
				2751	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
				2752	}
				2753
				2754	void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
				2755	{
				2756	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
				2757	exit_sie(vcpu);
				2758	}
				2759
				2760	void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
				2761	{
				2762	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
				2763	}
				2764
				2765	static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
				2766	{
				2767	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
				2768	exit_sie(vcpu);
				2769	}
				2770
				2771	static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
				2772	{
				2773	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
				2774	}
				2775
				2776	/*
				2777	* Kick a guest cpu out of SIE and wait until SIE is not running.
				2778	* If the CPU is not running (e.g. waiting as idle) the function will
				2779	* return immediately. */
				2780	void exit_sie(struct kvm_vcpu *vcpu)
				2781	{
				2782	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
				2783	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
				2784	cpu_relax();
				2785	}
				2786
				2787	/* Kick a guest cpu out of SIE to process a request synchronously */
				2788	void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
				2789	{
				2790	kvm_make_request(req, vcpu);
				2791	kvm_s390_vcpu_request(vcpu);
				2792	}
				2793
				2794	static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
				2795	unsigned long end)
				2796	{
				2797	struct kvm *kvm = gmap->private;
				2798	struct kvm_vcpu *vcpu;
				2799	unsigned long prefix;
				2800	int i;
				2801
				2802	if (gmap_is_shadow(gmap))
				2803	return;
				2804	if (start >= 1UL << 31)
				2805	/* We are only interested in prefix pages */
				2806	return;
				2807	kvm_for_each_vcpu(i, vcpu, kvm) {
				2808	/* match against both prefix pages */
				2809	prefix = kvm_s390_get_prefix(vcpu);
				2810	if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
				2811	VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
				2812	start, end);
				2813	kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
				2814	}
				2815	}
				2816	}
				2817
				2818	int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
				2819	{
				2820	/* kvm common code refers to this, but never calls it */
				2821	BUG();
				2822	return 0;
				2823	}
				2824
				2825	static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
				2826	struct kvm_one_reg *reg)
				2827	{
				2828	int r = -EINVAL;
				2829
				2830	switch (reg->id) {
				2831	case KVM_REG_S390_TODPR:
				2832	r = put_user(vcpu->arch.sie_block->todpr,
				2833	(u32 __user *)reg->addr);
				2834	break;
				2835	case KVM_REG_S390_EPOCHDIFF:
				2836	r = put_user(vcpu->arch.sie_block->epoch,
				2837	(u64 __user *)reg->addr);
				2838	break;
				2839	case KVM_REG_S390_CPU_TIMER:
				2840	r = put_user(kvm_s390_get_cpu_timer(vcpu),
				2841	(u64 __user *)reg->addr);
				2842	break;
				2843	case KVM_REG_S390_CLOCK_COMP:
				2844	r = put_user(vcpu->arch.sie_block->ckc,
				2845	(u64 __user *)reg->addr);
				2846	break;
				2847	case KVM_REG_S390_PFTOKEN:
				2848	r = put_user(vcpu->arch.pfault_token,
				2849	(u64 __user *)reg->addr);
				2850	break;
				2851	case KVM_REG_S390_PFCOMPARE:
				2852	r = put_user(vcpu->arch.pfault_compare,
				2853	(u64 __user *)reg->addr);
				2854	break;
				2855	case KVM_REG_S390_PFSELECT:
				2856	r = put_user(vcpu->arch.pfault_select,
				2857	(u64 __user *)reg->addr);
				2858	break;
				2859	case KVM_REG_S390_PP:
				2860	r = put_user(vcpu->arch.sie_block->pp,
				2861	(u64 __user *)reg->addr);
				2862	break;
				2863	case KVM_REG_S390_GBEA:
				2864	r = put_user(vcpu->arch.sie_block->gbea,
				2865	(u64 __user *)reg->addr);
				2866	break;
				2867	default:
				2868	break;
				2869	}
				2870
				2871	return r;
				2872	}
				2873
				2874	static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
				2875	struct kvm_one_reg *reg)
				2876	{
				2877	int r = -EINVAL;
				2878	__u64 val;
				2879
				2880	switch (reg->id) {
				2881	case KVM_REG_S390_TODPR:
				2882	r = get_user(vcpu->arch.sie_block->todpr,
				2883	(u32 __user *)reg->addr);
				2884	break;
				2885	case KVM_REG_S390_EPOCHDIFF:
				2886	r = get_user(vcpu->arch.sie_block->epoch,
				2887	(u64 __user *)reg->addr);
				2888	break;
				2889	case KVM_REG_S390_CPU_TIMER:
				2890	r = get_user(val, (u64 __user *)reg->addr);
				2891	if (!r)
				2892	kvm_s390_set_cpu_timer(vcpu, val);
				2893	break;
				2894	case KVM_REG_S390_CLOCK_COMP:
				2895	r = get_user(vcpu->arch.sie_block->ckc,
				2896	(u64 __user *)reg->addr);
				2897	break;
				2898	case KVM_REG_S390_PFTOKEN:
				2899	r = get_user(vcpu->arch.pfault_token,
				2900	(u64 __user *)reg->addr);
				2901	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
				2902	kvm_clear_async_pf_completion_queue(vcpu);
				2903	break;
				2904	case KVM_REG_S390_PFCOMPARE:
				2905	r = get_user(vcpu->arch.pfault_compare,
				2906	(u64 __user *)reg->addr);
				2907	break;
				2908	case KVM_REG_S390_PFSELECT:
				2909	r = get_user(vcpu->arch.pfault_select,
				2910	(u64 __user *)reg->addr);
				2911	break;
				2912	case KVM_REG_S390_PP:
				2913	r = get_user(vcpu->arch.sie_block->pp,
				2914	(u64 __user *)reg->addr);
				2915	break;
				2916	case KVM_REG_S390_GBEA:
				2917	r = get_user(vcpu->arch.sie_block->gbea,
				2918	(u64 __user *)reg->addr);
				2919	break;
				2920	default:
				2921	break;
				2922	}
				2923
				2924	return r;
				2925	}
				2926
				2927	static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
				2928	{
				2929	kvm_s390_vcpu_initial_reset(vcpu);
				2930	return 0;
				2931	}
				2932
				2933	int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu vcpu, struct kvm_regs regs)
				2934	{
				2935	vcpu_load(vcpu);
				2936	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
				2937	vcpu_put(vcpu);
				2938	return 0;
				2939	}
				2940
				2941	int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu vcpu, struct kvm_regs regs)
				2942	{
				2943	vcpu_load(vcpu);
				2944	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
				2945	vcpu_put(vcpu);
				2946	return 0;
				2947	}
				2948
				2949	int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				2950	struct kvm_sregs *sregs)
				2951	{
				2952	vcpu_load(vcpu);
				2953
				2954	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
				2955	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
				2956
				2957	vcpu_put(vcpu);
				2958	return 0;
				2959	}
				2960
				2961	int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				2962	struct kvm_sregs *sregs)
				2963	{
				2964	vcpu_load(vcpu);
				2965
				2966	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
				2967	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
				2968
				2969	vcpu_put(vcpu);
				2970	return 0;
				2971	}
				2972
				2973	int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu vcpu, struct kvm_fpu fpu)
				2974	{
				2975	int ret = 0;
				2976
				2977	vcpu_load(vcpu);
				2978
				2979	if (test_fp_ctl(fpu->fpc)) {
				2980	ret = -EINVAL;
				2981	goto out;
				2982	}
				2983	vcpu->run->s.regs.fpc = fpu->fpc;
				2984	if (MACHINE_HAS_VX)
				2985	convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
				2986	(freg_t *) fpu->fprs);
				2987	else
				2988	memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
				2989
				2990	out:
				2991	vcpu_put(vcpu);
				2992	return ret;
				2993	}
				2994
				2995	int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu vcpu, struct kvm_fpu fpu)
				2996	{
				2997	vcpu_load(vcpu);
				2998
				2999	/* make sure we have the latest values */
				3000	save_fpu_regs();
				3001	if (MACHINE_HAS_VX)
				3002	convert_vx_to_fp((freg_t *) fpu->fprs,
				3003	(__vector128 *) vcpu->run->s.regs.vrs);
				3004	else
				3005	memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
				3006	fpu->fpc = vcpu->run->s.regs.fpc;
				3007
				3008	vcpu_put(vcpu);
				3009	return 0;
				3010	}
				3011
				3012	static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
				3013	{
				3014	int rc = 0;
				3015
				3016	if (!is_vcpu_stopped(vcpu))
				3017	rc = -EBUSY;
				3018	else {
				3019	vcpu->run->psw_mask = psw.mask;
				3020	vcpu->run->psw_addr = psw.addr;
				3021	}
				3022	return rc;
				3023	}
				3024
				3025	int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				3026	struct kvm_translation *tr)
				3027	{
				3028	return -EINVAL; /* not implemented yet */
				3029	}
				3030
				3031	#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP \| \
				3032	KVM_GUESTDBG_USE_HW_BP \| \
				3033	KVM_GUESTDBG_ENABLE)
				3034
				3035	int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
				3036	struct kvm_guest_debug *dbg)
				3037	{
				3038	int rc = 0;
				3039
				3040	vcpu_load(vcpu);
				3041
				3042	vcpu->guest_debug = 0;
				3043	kvm_s390_clear_bp_data(vcpu);
				3044
				3045	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
				3046	rc = -EINVAL;
				3047	goto out;
				3048	}
				3049	if (!sclp.has_gpere) {
				3050	rc = -EINVAL;
				3051	goto out;
				3052	}
				3053
				3054	if (dbg->control & KVM_GUESTDBG_ENABLE) {
				3055	vcpu->guest_debug = dbg->control;
				3056	/* enforce guest PER */
				3057	kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
				3058
				3059	if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
				3060	rc = kvm_s390_import_bp_data(vcpu, dbg);
				3061	} else {
				3062	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
				3063	vcpu->arch.guestdbg.last_bp = 0;
				3064	}
				3065
				3066	if (rc) {
				3067	vcpu->guest_debug = 0;
				3068	kvm_s390_clear_bp_data(vcpu);
				3069	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
				3070	}
				3071
				3072	out:
				3073	vcpu_put(vcpu);
				3074	return rc;
				3075	}
				3076
				3077	int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
				3078	struct kvm_mp_state *mp_state)
				3079	{
				3080	int ret;
				3081
				3082	vcpu_load(vcpu);
				3083
				3084	/* CHECK_STOP and LOAD are not supported yet */
				3085	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
				3086	KVM_MP_STATE_OPERATING;
				3087
				3088	vcpu_put(vcpu);
				3089	return ret;
				3090	}
				3091
				3092	int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
				3093	struct kvm_mp_state *mp_state)
				3094	{
				3095	int rc = 0;
				3096
				3097	vcpu_load(vcpu);
				3098
				3099	/* user space knows about this interface - let it control the state */
				3100	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
				3101
				3102	switch (mp_state->mp_state) {
				3103	case KVM_MP_STATE_STOPPED:
				3104	kvm_s390_vcpu_stop(vcpu);
				3105	break;
				3106	case KVM_MP_STATE_OPERATING:
				3107	kvm_s390_vcpu_start(vcpu);
				3108	break;
				3109	case KVM_MP_STATE_LOAD:
				3110	case KVM_MP_STATE_CHECK_STOP:
				3111	/* fall through - CHECK_STOP and LOAD are not supported yet */
				3112	default:
				3113	rc = -ENXIO;
				3114	}
				3115
				3116	vcpu_put(vcpu);
				3117	return rc;
				3118	}
				3119
				3120	static bool ibs_enabled(struct kvm_vcpu *vcpu)
				3121	{
				3122	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
				3123	}
				3124
				3125	static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
				3126	{
				3127	retry:
				3128	kvm_s390_vcpu_request_handled(vcpu);
				3129	if (!kvm_request_pending(vcpu))
				3130	return 0;
				3131	/*
				3132	* We use MMU_RELOAD just to re-arm the ipte notifier for the
				3133	* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
				3134	* This ensures that the ipte instruction for this request has
				3135	* already finished. We might race against a second unmapper that
				3136	* wants to set the blocking bit. Lets just retry the request loop.
				3137	*/
				3138	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
				3139	int rc;
				3140	rc = gmap_mprotect_notify(vcpu->arch.gmap,
				3141	kvm_s390_get_prefix(vcpu),
				3142	PAGE_SIZE * 2, PROT_WRITE);
				3143	if (rc) {
				3144	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
				3145	return rc;
				3146	}
				3147	goto retry;
				3148	}
				3149
				3150	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
				3151	vcpu->arch.sie_block->ihcpu = 0xffff;
				3152	goto retry;
				3153	}
				3154
				3155	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
				3156	if (!ibs_enabled(vcpu)) {
				3157	trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
				3158	kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
				3159	}
				3160	goto retry;
				3161	}
				3162
				3163	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
				3164	if (ibs_enabled(vcpu)) {
				3165	trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
				3166	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
				3167	}
				3168	goto retry;
				3169	}
				3170
				3171	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
				3172	vcpu->arch.sie_block->ictl \|= ICTL_OPEREXC;
				3173	goto retry;
				3174	}
				3175
				3176	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
				3177	/*
				3178	* Disable CMM virtualization; we will emulate the ESSA
				3179	* instruction manually, in order to provide additional
				3180	* functionalities needed for live migration.
				3181	*/
				3182	vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
				3183	goto retry;
				3184	}
				3185
				3186	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
				3187	/*
				3188	* Re-enable CMM virtualization if CMMA is available and
				3189	* CMM has been used.
				3190	*/
				3191	if ((vcpu->kvm->arch.use_cmma) &&
				3192	(vcpu->kvm->mm->context.uses_cmm))
				3193	vcpu->arch.sie_block->ecb2 \|= ECB2_CMMA;
				3194	goto retry;
				3195	}
				3196
				3197	/* nothing to do, just clear the request */
				3198	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
				3199
				3200	return 0;
				3201	}
				3202
				3203	void kvm_s390_set_tod_clock(struct kvm *kvm,
				3204	const struct kvm_s390_vm_tod_clock *gtod)
				3205	{
				3206	struct kvm_vcpu *vcpu;
				3207	struct kvm_s390_tod_clock_ext htod;
				3208	int i;
				3209
				3210	mutex_lock(&kvm->lock);
				3211	preempt_disable();
				3212
				3213	get_tod_clock_ext((char *)&htod);
				3214
				3215	kvm->arch.epoch = gtod->tod - htod.tod;
				3216	kvm->arch.epdx = 0;
				3217	if (test_kvm_facility(kvm, 139)) {
				3218	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
				3219	if (kvm->arch.epoch > gtod->tod)
				3220	kvm->arch.epdx -= 1;
				3221	}
				3222
				3223	kvm_s390_vcpu_block_all(kvm);
				3224	kvm_for_each_vcpu(i, vcpu, kvm) {
				3225	vcpu->arch.sie_block->epoch = kvm->arch.epoch;
				3226	vcpu->arch.sie_block->epdx = kvm->arch.epdx;
				3227	}
				3228
				3229	kvm_s390_vcpu_unblock_all(kvm);
				3230	preempt_enable();
				3231	mutex_unlock(&kvm->lock);
				3232	}
				3233
				3234	/**
				3235	* kvm_arch_fault_in_page - fault-in guest page if necessary
				3236	* @vcpu: The corresponding virtual cpu
				3237	* @gpa: Guest physical address
				3238	* @writable: Whether the page should be writable or not
				3239	*
				3240	* Make sure that a guest page has been faulted-in on the host.
				3241	*
				3242	* Return: Zero on success, negative error code otherwise.
				3243	*/
				3244	long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
				3245	{
				3246	return gmap_fault(vcpu->arch.gmap, gpa,
				3247	writable ? FAULT_FLAG_WRITE : 0);
				3248	}
				3249
				3250	static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
				3251	unsigned long token)
				3252	{
				3253	struct kvm_s390_interrupt inti;
				3254	struct kvm_s390_irq irq;
				3255
				3256	if (start_token) {
				3257	irq.u.ext.ext_params2 = token;
				3258	irq.type = KVM_S390_INT_PFAULT_INIT;
				3259	WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
				3260	} else {
				3261	inti.type = KVM_S390_INT_PFAULT_DONE;
				3262	inti.parm64 = token;
				3263	WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
				3264	}
				3265	}
				3266
				3267	void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
				3268	struct kvm_async_pf *work)
				3269	{
				3270	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
				3271	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
				3272	}
				3273
				3274	void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
				3275	struct kvm_async_pf *work)
				3276	{
				3277	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
				3278	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
				3279	}
				3280
				3281	void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
				3282	struct kvm_async_pf *work)
				3283	{
				3284	/* s390 will always inject the page directly */
				3285	}
				3286
				3287	bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
				3288	{
				3289	/*
				3290	* s390 will always inject the page directly,
				3291	* but we still want check_async_completion to cleanup
				3292	*/
				3293	return true;
				3294	}
				3295
				3296	static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
				3297	{
				3298	hva_t hva;
				3299	struct kvm_arch_async_pf arch;
				3300	int rc;
				3301
				3302	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
				3303	return 0;
				3304	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
				3305	vcpu->arch.pfault_compare)
				3306	return 0;
				3307	if (psw_extint_disabled(vcpu))
				3308	return 0;
				3309	if (kvm_s390_vcpu_has_irq(vcpu, 0))
				3310	return 0;
				3311	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
				3312	return 0;
				3313	if (!vcpu->arch.gmap->pfault_enabled)
				3314	return 0;
				3315
				3316	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
				3317	hva += current->thread.gmap_addr & ~PAGE_MASK;
				3318	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
				3319	return 0;
				3320
				3321	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
				3322	return rc;
				3323	}
				3324
				3325	static int vcpu_pre_run(struct kvm_vcpu *vcpu)
				3326	{
				3327	int rc, cpuflags;
				3328
				3329	/*
				3330	* On s390 notifications for arriving pages will be delivered directly
				3331	* to the guest but the house keeping for completed pfaults is
				3332	* handled outside the worker.
				3333	*/
				3334	kvm_check_async_pf_completion(vcpu);
				3335
				3336	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
				3337	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
				3338
				3339	if (need_resched())
				3340	schedule();
				3341
				3342	if (test_cpu_flag(CIF_MCCK_PENDING))
				3343	s390_handle_mcck();
				3344
				3345	if (!kvm_is_ucontrol(vcpu->kvm)) {
				3346	rc = kvm_s390_deliver_pending_interrupts(vcpu);
				3347	if (rc)
				3348	return rc;
				3349	}
				3350
				3351	rc = kvm_s390_handle_requests(vcpu);
				3352	if (rc)
				3353	return rc;
				3354
				3355	if (guestdbg_enabled(vcpu)) {
				3356	kvm_s390_backup_guest_per_regs(vcpu);
				3357	kvm_s390_patch_guest_per_regs(vcpu);
				3358	}
				3359
				3360	vcpu->arch.sie_block->icptcode = 0;
				3361	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
				3362	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
				3363	trace_kvm_s390_sie_enter(vcpu, cpuflags);
				3364
				3365	return 0;
				3366	}
				3367
				3368	static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
				3369	{
				3370	struct kvm_s390_pgm_info pgm_info = {
				3371	.code = PGM_ADDRESSING,
				3372	};
				3373	u8 opcode, ilen;
				3374	int rc;
				3375
				3376	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
				3377	trace_kvm_s390_sie_fault(vcpu);
				3378
				3379	/*
				3380	* We want to inject an addressing exception, which is defined as a
				3381	* suppressing or terminating exception. However, since we came here
				3382	* by a DAT access exception, the PSW still points to the faulting
				3383	* instruction since DAT exceptions are nullifying. So we've got
				3384	* to look up the current opcode to get the length of the instruction
				3385	* to be able to forward the PSW.
				3386	*/
				3387	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
				3388	ilen = insn_length(opcode);
				3389	if (rc < 0) {
				3390	return rc;
				3391	} else if (rc) {
				3392	/* Instruction-Fetching Exceptions - we can't detect the ilen.
				3393	* Forward by arbitrary ilc, injection will take care of
				3394	* nullification if necessary.
				3395	*/
				3396	pgm_info = vcpu->arch.pgm;
				3397	ilen = 4;
				3398	}
				3399	pgm_info.flags = ilen \| KVM_S390_PGM_FLAGS_ILC_VALID;
				3400	kvm_s390_forward_psw(vcpu, ilen);
				3401	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
				3402	}
				3403
				3404	static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
				3405	{
				3406	struct mcck_volatile_info *mcck_info;
				3407	struct sie_page *sie_page;
				3408
				3409	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
				3410	vcpu->arch.sie_block->icptcode);
				3411	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
				3412
				3413	if (guestdbg_enabled(vcpu))
				3414	kvm_s390_restore_guest_per_regs(vcpu);
				3415
				3416	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
				3417	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
				3418
				3419	if (exit_reason == -EINTR) {
				3420	VCPU_EVENT(vcpu, 3, "%s", "machine check");
				3421	sie_page = container_of(vcpu->arch.sie_block,
				3422	struct sie_page, sie_block);
				3423	mcck_info = &sie_page->mcck_info;
				3424	kvm_s390_reinject_machine_check(vcpu, mcck_info);
				3425	return 0;
				3426	}
				3427
				3428	if (vcpu->arch.sie_block->icptcode > 0) {
				3429	int rc = kvm_handle_sie_intercept(vcpu);
				3430
				3431	if (rc != -EOPNOTSUPP)
				3432	return rc;
				3433	vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
				3434	vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
				3435	vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
				3436	vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
				3437	return -EREMOTE;
				3438	} else if (exit_reason != -EFAULT) {
				3439	vcpu->stat.exit_null++;
				3440	return 0;
				3441	} else if (kvm_is_ucontrol(vcpu->kvm)) {
				3442	vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
				3443	vcpu->run->s390_ucontrol.trans_exc_code =
				3444	current->thread.gmap_addr;
				3445	vcpu->run->s390_ucontrol.pgm_code = 0x10;
				3446	return -EREMOTE;
				3447	} else if (current->thread.gmap_pfault) {
				3448	trace_kvm_s390_major_guest_pfault(vcpu);
				3449	current->thread.gmap_pfault = 0;
				3450	if (kvm_arch_setup_async_pf(vcpu))
				3451	return 0;
				3452	return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
				3453	}
				3454	return vcpu_post_run_fault_in_sie(vcpu);
				3455	}
				3456
				3457	static int __vcpu_run(struct kvm_vcpu *vcpu)
				3458	{
				3459	int rc, exit_reason;
				3460
				3461	/*
				3462	* We try to hold kvm->srcu during most of vcpu_run (except when run-
				3463	* ning the guest), so that memslots (and other stuff) are protected
				3464	*/
				3465	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				3466
				3467	do {
				3468	rc = vcpu_pre_run(vcpu);
				3469	if (rc)
				3470	break;
				3471
				3472	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
				3473	/*
				3474	* As PF_VCPU will be used in fault handler, between
				3475	* guest_enter and guest_exit should be no uaccess.
				3476	*/
				3477	local_irq_disable();
				3478	guest_enter_irqoff();
				3479	__disable_cpu_timer_accounting(vcpu);
				3480	local_irq_enable();
				3481	exit_reason = sie64a(vcpu->arch.sie_block,
				3482	vcpu->run->s.regs.gprs);
				3483	local_irq_disable();
				3484	__enable_cpu_timer_accounting(vcpu);
				3485	guest_exit_irqoff();
				3486	local_irq_enable();
				3487	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				3488
				3489	rc = vcpu_post_run(vcpu, exit_reason);
				3490	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
				3491
				3492	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
				3493	return rc;
				3494	}
				3495
				3496	static void sync_regs(struct kvm_vcpu vcpu, struct kvm_run kvm_run)
				3497	{
				3498	struct runtime_instr_cb *riccb;
				3499	struct gs_cb *gscb;
				3500
				3501	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
				3502	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
				3503	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
				3504	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
				3505	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
				3506	kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
				3507	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
				3508	memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
				3509	/* some control register changes require a tlb flush */
				3510	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
				3511	}
				3512	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
				3513	kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
				3514	vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
				3515	vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
				3516	vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
				3517	vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
				3518	}
				3519	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
				3520	vcpu->arch.pfault_token = kvm_run->s.regs.pft;
				3521	vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
				3522	vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
				3523	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
				3524	kvm_clear_async_pf_completion_queue(vcpu);
				3525	}
				3526	/*
				3527	* If userspace sets the riccb (e.g. after migration) to a valid state,
				3528	* we should enable RI here instead of doing the lazy enablement.
				3529	*/
				3530	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
				3531	test_kvm_facility(vcpu->kvm, 64) &&
				3532	riccb->v &&
				3533	!(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
				3534	VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
				3535	vcpu->arch.sie_block->ecb3 \|= ECB3_RI;
				3536	}
				3537	/*
				3538	* If userspace sets the gscb (e.g. after migration) to non-zero,
				3539	* we should enable GS here instead of doing the lazy enablement.
				3540	*/
				3541	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
				3542	test_kvm_facility(vcpu->kvm, 133) &&
				3543	gscb->gssm &&
				3544	!vcpu->arch.gs_enabled) {
				3545	VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
				3546	vcpu->arch.sie_block->ecb \|= ECB_GS;
				3547	vcpu->arch.sie_block->ecd \|= ECD_HOSTREGMGMT;
				3548	vcpu->arch.gs_enabled = 1;
				3549	}
				3550	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
				3551	test_kvm_facility(vcpu->kvm, 82)) {
				3552	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
				3553	vcpu->arch.sie_block->fpf \|= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
				3554	}
				3555	save_access_regs(vcpu->arch.host_acrs);
				3556	restore_access_regs(vcpu->run->s.regs.acrs);
				3557	/* save host (userspace) fprs/vrs */
				3558	save_fpu_regs();
				3559	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
				3560	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
				3561	if (MACHINE_HAS_VX)
				3562	current->thread.fpu.regs = vcpu->run->s.regs.vrs;
				3563	else
				3564	current->thread.fpu.regs = vcpu->run->s.regs.fprs;
				3565	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
				3566	if (test_fp_ctl(current->thread.fpu.fpc))
				3567	/* User space provided an invalid FPC, let's clear it */
				3568	current->thread.fpu.fpc = 0;
				3569	if (MACHINE_HAS_GS) {
				3570	preempt_disable();
				3571	__ctl_set_bit(2, 4);
				3572	if (current->thread.gs_cb) {
				3573	vcpu->arch.host_gscb = current->thread.gs_cb;
				3574	save_gs_cb(vcpu->arch.host_gscb);
				3575	}
				3576	if (vcpu->arch.gs_enabled) {
				3577	current->thread.gs_cb = (struct gs_cb *)
				3578	&vcpu->run->s.regs.gscb;
				3579	restore_gs_cb(current->thread.gs_cb);
				3580	}
				3581	preempt_enable();
				3582	}
				3583	/* SIE will load etoken directly from SDNX and therefore kvm_run */
				3584
				3585	kvm_run->kvm_dirty_regs = 0;
				3586	}
				3587
				3588	static void store_regs(struct kvm_vcpu vcpu, struct kvm_run kvm_run)
				3589	{
				3590	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
				3591	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
				3592	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
				3593	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
				3594	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
				3595	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
				3596	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
				3597	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
				3598	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
				3599	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
				3600	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
				3601	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
				3602	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
				3603	save_access_regs(vcpu->run->s.regs.acrs);
				3604	restore_access_regs(vcpu->arch.host_acrs);
				3605	/* Save guest register state */
				3606	save_fpu_regs();
				3607	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
				3608	/* Restore will be done lazily at return */
				3609	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
				3610	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
				3611	if (MACHINE_HAS_GS) {
				3612	__ctl_set_bit(2, 4);
				3613	if (vcpu->arch.gs_enabled)
				3614	save_gs_cb(current->thread.gs_cb);
				3615	preempt_disable();
				3616	current->thread.gs_cb = vcpu->arch.host_gscb;
				3617	restore_gs_cb(vcpu->arch.host_gscb);
				3618	preempt_enable();
				3619	if (!vcpu->arch.host_gscb)
				3620	__ctl_clear_bit(2, 4);
				3621	vcpu->arch.host_gscb = NULL;
				3622	}
				3623	/* SIE will save etoken directly into SDNX and therefore kvm_run */
				3624	}
				3625
				3626	int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu vcpu, struct kvm_run kvm_run)
				3627	{
				3628	int rc;
				3629
				3630	if (kvm_run->immediate_exit)
				3631	return -EINTR;
				3632
				3633	vcpu_load(vcpu);
				3634
				3635	if (guestdbg_exit_pending(vcpu)) {
				3636	kvm_s390_prepare_debug_exit(vcpu);
				3637	rc = 0;
				3638	goto out;
				3639	}
				3640
				3641	kvm_sigset_activate(vcpu);
				3642
				3643	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
				3644	kvm_s390_vcpu_start(vcpu);
				3645	} else if (is_vcpu_stopped(vcpu)) {
				3646	pr_err_ratelimited("can't run stopped vcpu %d\n",
				3647	vcpu->vcpu_id);
				3648	rc = -EINVAL;
				3649	goto out;
				3650	}
				3651
				3652	sync_regs(vcpu, kvm_run);
				3653	enable_cpu_timer_accounting(vcpu);
				3654
				3655	might_fault();
				3656	rc = __vcpu_run(vcpu);
				3657
				3658	if (signal_pending(current) && !rc) {
				3659	kvm_run->exit_reason = KVM_EXIT_INTR;
				3660	rc = -EINTR;
				3661	}
				3662
				3663	if (guestdbg_exit_pending(vcpu) && !rc) {
				3664	kvm_s390_prepare_debug_exit(vcpu);
				3665	rc = 0;
				3666	}
				3667
				3668	if (rc == -EREMOTE) {
				3669	/* userspace support is needed, kvm_run has been prepared */
				3670	rc = 0;
				3671	}
				3672
				3673	disable_cpu_timer_accounting(vcpu);
				3674	store_regs(vcpu, kvm_run);
				3675
				3676	kvm_sigset_deactivate(vcpu);
				3677
				3678	vcpu->stat.exit_userspace++;
				3679	out:
				3680	vcpu_put(vcpu);
				3681	return rc;
				3682	}
				3683
				3684	/*
				3685	* store status at address
				3686	* we use have two special cases:
				3687	* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
				3688	* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
				3689	*/
				3690	int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
				3691	{
				3692	unsigned char archmode = 1;
				3693	freg_t fprs[NUM_FPRS];
				3694	unsigned int px;
				3695	u64 clkcomp, cputm;
				3696	int rc;
				3697
				3698	px = kvm_s390_get_prefix(vcpu);
				3699	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
				3700	if (write_guest_abs(vcpu, 163, &archmode, 1))
				3701	return -EFAULT;
				3702	gpa = 0;
				3703	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
				3704	if (write_guest_real(vcpu, 163, &archmode, 1))
				3705	return -EFAULT;
				3706	gpa = px;
				3707	} else
				3708	gpa -= __LC_FPREGS_SAVE_AREA;
				3709
				3710	/* manually convert vector registers if necessary */
				3711	if (MACHINE_HAS_VX) {
				3712	convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
				3713	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
				3714	fprs, 128);
				3715	} else {
				3716	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
				3717	vcpu->run->s.regs.fprs, 128);
				3718	}
				3719	rc \|= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
				3720	vcpu->run->s.regs.gprs, 128);
				3721	rc \|= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
				3722	&vcpu->arch.sie_block->gpsw, 16);
				3723	rc \|= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
				3724	&px, 4);
				3725	rc \|= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
				3726	&vcpu->run->s.regs.fpc, 4);
				3727	rc \|= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
				3728	&vcpu->arch.sie_block->todpr, 4);
				3729	cputm = kvm_s390_get_cpu_timer(vcpu);
				3730	rc \|= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
				3731	&cputm, 8);
				3732	clkcomp = vcpu->arch.sie_block->ckc >> 8;
				3733	rc \|= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
				3734	&clkcomp, 8);
				3735	rc \|= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
				3736	&vcpu->run->s.regs.acrs, 64);
				3737	rc \|= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
				3738	&vcpu->arch.sie_block->gcr, 128);
				3739	return rc ? -EFAULT : 0;
				3740	}
				3741
				3742	int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
				3743	{
				3744	/*
				3745	* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
				3746	* switch in the run ioctl. Let's update our copies before we save
				3747	* it into the save area
				3748	*/
				3749	save_fpu_regs();
				3750	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
				3751	save_access_regs(vcpu->run->s.regs.acrs);
				3752
				3753	return kvm_s390_store_status_unloaded(vcpu, addr);
				3754	}
				3755
				3756	static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
				3757	{
				3758	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
				3759	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
				3760	}
				3761
				3762	static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
				3763	{
				3764	unsigned int i;
				3765	struct kvm_vcpu *vcpu;
				3766
				3767	kvm_for_each_vcpu(i, vcpu, kvm) {
				3768	__disable_ibs_on_vcpu(vcpu);
				3769	}
				3770	}
				3771
				3772	static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
				3773	{
				3774	if (!sclp.has_ibs)
				3775	return;
				3776	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
				3777	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
				3778	}
				3779
				3780	void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
				3781	{
				3782	int i, online_vcpus, started_vcpus = 0;
				3783
				3784	if (!is_vcpu_stopped(vcpu))
				3785	return;
				3786
				3787	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
				3788	/* Only one cpu at a time may enter/leave the STOPPED state. */
				3789	spin_lock(&vcpu->kvm->arch.start_stop_lock);
				3790	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
				3791
				3792	for (i = 0; i < online_vcpus; i++) {
				3793	if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
				3794	started_vcpus++;
				3795	}
				3796
				3797	if (started_vcpus == 0) {
				3798	/* we're the only active VCPU -> speed it up */
				3799	__enable_ibs_on_vcpu(vcpu);
				3800	} else if (started_vcpus == 1) {
				3801	/*
				3802	* As we are starting a second VCPU, we have to disable
				3803	* the IBS facility on all VCPUs to remove potentially
				3804	* oustanding ENABLE requests.
				3805	*/
				3806	__disable_ibs_on_all_vcpus(vcpu->kvm);
				3807	}
				3808
				3809	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
				3810	/*
				3811	* Another VCPU might have used IBS while we were offline.
				3812	* Let's play safe and flush the VCPU at startup.
				3813	*/
				3814	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
				3815	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
				3816	return;
				3817	}
				3818
				3819	void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
				3820	{
				3821	int i, online_vcpus, started_vcpus = 0;
				3822	struct kvm_vcpu *started_vcpu = NULL;
				3823
				3824	if (is_vcpu_stopped(vcpu))
				3825	return;
				3826
				3827	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
				3828	/* Only one cpu at a time may enter/leave the STOPPED state. */
				3829	spin_lock(&vcpu->kvm->arch.start_stop_lock);
				3830	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
				3831
				3832	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
				3833	kvm_s390_clear_stop_irq(vcpu);
				3834
				3835	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
				3836	__disable_ibs_on_vcpu(vcpu);
				3837
				3838	for (i = 0; i < online_vcpus; i++) {
				3839	if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
				3840	started_vcpus++;
				3841	started_vcpu = vcpu->kvm->vcpus[i];
				3842	}
				3843	}
				3844
				3845	if (started_vcpus == 1) {
				3846	/*
				3847	* As we only have one VCPU left, we want to enable the
				3848	* IBS facility for that VCPU to speed it up.
				3849	*/
				3850	__enable_ibs_on_vcpu(started_vcpu);
				3851	}
				3852
				3853	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
				3854	return;
				3855	}
				3856
				3857	static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
				3858	struct kvm_enable_cap *cap)
				3859	{
				3860	int r;
				3861
				3862	if (cap->flags)
				3863	return -EINVAL;
				3864
				3865	switch (cap->cap) {
				3866	case KVM_CAP_S390_CSS_SUPPORT:
				3867	if (!vcpu->kvm->arch.css_support) {
				3868	vcpu->kvm->arch.css_support = 1;
				3869	VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
				3870	trace_kvm_s390_enable_css(vcpu->kvm);
				3871	}
				3872	r = 0;
				3873	break;
				3874	default:
				3875	r = -EINVAL;
				3876	break;
				3877	}
				3878	return r;
				3879	}
				3880
				3881	static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
				3882	struct kvm_s390_mem_op *mop)
				3883	{
				3884	void __user uaddr = (void __user )mop->buf;
				3885	void *tmpbuf = NULL;
				3886	int r, srcu_idx;
				3887	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
				3888	\| KVM_S390_MEMOP_F_CHECK_ONLY;
				3889
				3890	if (mop->flags & ~supported_flags)
				3891	return -EINVAL;
				3892
				3893	if (mop->size > MEM_OP_MAX_SIZE)
				3894	return -E2BIG;
				3895
				3896	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
				3897	tmpbuf = vmalloc(mop->size);
				3898	if (!tmpbuf)
				3899	return -ENOMEM;
				3900	}
				3901
				3902	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				3903
				3904	switch (mop->op) {
				3905	case KVM_S390_MEMOP_LOGICAL_READ:
				3906	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
				3907	r = check_gva_range(vcpu, mop->gaddr, mop->ar,
				3908	mop->size, GACC_FETCH);
				3909	break;
				3910	}
				3911	r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
				3912	if (r == 0) {
				3913	if (copy_to_user(uaddr, tmpbuf, mop->size))
				3914	r = -EFAULT;
				3915	}
				3916	break;
				3917	case KVM_S390_MEMOP_LOGICAL_WRITE:
				3918	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
				3919	r = check_gva_range(vcpu, mop->gaddr, mop->ar,
				3920	mop->size, GACC_STORE);
				3921	break;
				3922	}
				3923	if (copy_from_user(tmpbuf, uaddr, mop->size)) {
				3924	r = -EFAULT;
				3925	break;
				3926	}
				3927	r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
				3928	break;
				3929	default:
				3930	r = -EINVAL;
				3931	}
				3932
				3933	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
				3934
				3935	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
				3936	kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
				3937
				3938	vfree(tmpbuf);
				3939	return r;
				3940	}
				3941
				3942	long kvm_arch_vcpu_async_ioctl(struct file *filp,
				3943	unsigned int ioctl, unsigned long arg)
				3944	{
				3945	struct kvm_vcpu *vcpu = filp->private_data;
				3946	void __user argp = (void __user )arg;
				3947
				3948	switch (ioctl) {
				3949	case KVM_S390_IRQ: {
				3950	struct kvm_s390_irq s390irq;
				3951
				3952	if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
				3953	return -EFAULT;
				3954	return kvm_s390_inject_vcpu(vcpu, &s390irq);
				3955	}
				3956	case KVM_S390_INTERRUPT: {
				3957	struct kvm_s390_interrupt s390int;
				3958	struct kvm_s390_irq s390irq;
				3959
				3960	if (copy_from_user(&s390int, argp, sizeof(s390int)))
				3961	return -EFAULT;
				3962	if (s390int_to_s390irq(&s390int, &s390irq))
				3963	return -EINVAL;
				3964	return kvm_s390_inject_vcpu(vcpu, &s390irq);
				3965	}
				3966	}
				3967	return -ENOIOCTLCMD;
				3968	}
				3969
				3970	long kvm_arch_vcpu_ioctl(struct file *filp,
				3971	unsigned int ioctl, unsigned long arg)
				3972	{
				3973	struct kvm_vcpu *vcpu = filp->private_data;
				3974	void __user argp = (void __user )arg;
				3975	int idx;
				3976	long r;
				3977
				3978	vcpu_load(vcpu);
				3979
				3980	switch (ioctl) {
				3981	case KVM_S390_STORE_STATUS:
				3982	idx = srcu_read_lock(&vcpu->kvm->srcu);
				3983	r = kvm_s390_vcpu_store_status(vcpu, arg);
				3984	srcu_read_unlock(&vcpu->kvm->srcu, idx);
				3985	break;
				3986	case KVM_S390_SET_INITIAL_PSW: {
				3987	psw_t psw;
				3988
				3989	r = -EFAULT;
				3990	if (copy_from_user(&psw, argp, sizeof(psw)))
				3991	break;
				3992	r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
				3993	break;
				3994	}
				3995	case KVM_S390_INITIAL_RESET:
				3996	r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
				3997	break;
				3998	case KVM_SET_ONE_REG:
				3999	case KVM_GET_ONE_REG: {
				4000	struct kvm_one_reg reg;
				4001	r = -EFAULT;
				4002	if (copy_from_user(&reg, argp, sizeof(reg)))
				4003	break;
				4004	if (ioctl == KVM_SET_ONE_REG)
				4005	r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
				4006	else
				4007	r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
				4008	break;
				4009	}
				4010	#ifdef CONFIG_KVM_S390_UCONTROL
				4011	case KVM_S390_UCAS_MAP: {
				4012	struct kvm_s390_ucas_mapping ucasmap;
				4013
				4014	if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
				4015	r = -EFAULT;
				4016	break;
				4017	}
				4018
				4019	if (!kvm_is_ucontrol(vcpu->kvm)) {
				4020	r = -EINVAL;
				4021	break;
				4022	}
				4023
				4024	r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
				4025	ucasmap.vcpu_addr, ucasmap.length);
				4026	break;
				4027	}
				4028	case KVM_S390_UCAS_UNMAP: {
				4029	struct kvm_s390_ucas_mapping ucasmap;
				4030
				4031	if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
				4032	r = -EFAULT;
				4033	break;
				4034	}
				4035
				4036	if (!kvm_is_ucontrol(vcpu->kvm)) {
				4037	r = -EINVAL;
				4038	break;
				4039	}
				4040
				4041	r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
				4042	ucasmap.length);
				4043	break;
				4044	}
				4045	#endif
				4046	case KVM_S390_VCPU_FAULT: {
				4047	r = gmap_fault(vcpu->arch.gmap, arg, 0);
				4048	break;
				4049	}
				4050	case KVM_ENABLE_CAP:
				4051	{
				4052	struct kvm_enable_cap cap;
				4053	r = -EFAULT;
				4054	if (copy_from_user(&cap, argp, sizeof(cap)))
				4055	break;
				4056	r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
				4057	break;
				4058	}
				4059	case KVM_S390_MEM_OP: {
				4060	struct kvm_s390_mem_op mem_op;
				4061
				4062	if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
				4063	r = kvm_s390_guest_mem_op(vcpu, &mem_op);
				4064	else
				4065	r = -EFAULT;
				4066	break;
				4067	}
				4068	case KVM_S390_SET_IRQ_STATE: {
				4069	struct kvm_s390_irq_state irq_state;
				4070
				4071	r = -EFAULT;
				4072	if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
				4073	break;
				4074	if (irq_state.len > VCPU_IRQS_MAX_BUF \|\|
				4075	irq_state.len == 0 \|\|
				4076	irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
				4077	r = -EINVAL;
				4078	break;
				4079	}
				4080	/* do not use irq_state.flags, it will break old QEMUs */
				4081	r = kvm_s390_set_irq_state(vcpu,
				4082	(void __user *) irq_state.buf,
				4083	irq_state.len);
				4084	break;
				4085	}
				4086	case KVM_S390_GET_IRQ_STATE: {
				4087	struct kvm_s390_irq_state irq_state;
				4088
				4089	r = -EFAULT;
				4090	if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
				4091	break;
				4092	if (irq_state.len == 0) {
				4093	r = -EINVAL;
				4094	break;
				4095	}
				4096	/* do not use irq_state.flags, it will break old QEMUs */
				4097	r = kvm_s390_get_irq_state(vcpu,
				4098	(__u8 __user *) irq_state.buf,
				4099	irq_state.len);
				4100	break;
				4101	}
				4102	default:
				4103	r = -ENOTTY;
				4104	}
				4105
				4106	vcpu_put(vcpu);
				4107	return r;
				4108	}
				4109
				4110	vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu vcpu, struct vm_fault vmf)
				4111	{
				4112	#ifdef CONFIG_KVM_S390_UCONTROL
				4113	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
				4114	&& (kvm_is_ucontrol(vcpu->kvm))) {
				4115	vmf->page = virt_to_page(vcpu->arch.sie_block);
				4116	get_page(vmf->page);
				4117	return 0;
				4118	}
				4119	#endif
				4120	return VM_FAULT_SIGBUS;
				4121	}
				4122
				4123	int kvm_arch_create_memslot(struct kvm kvm, struct kvm_memory_slot slot,
				4124	unsigned long npages)
				4125	{
				4126	return 0;
				4127	}
				4128
				4129	/* Section: memory related */
				4130	int kvm_arch_prepare_memory_region(struct kvm *kvm,
				4131	struct kvm_memory_slot *memslot,
				4132	const struct kvm_userspace_memory_region *mem,
				4133	enum kvm_mr_change change)
				4134	{
				4135	/* A few sanity checks. We can have memory slots which have to be
				4136	located/ended at a segment boundary (1MB). The memory in userland is
				4137	ok to be fragmented into various different vmas. It is okay to mmap()
				4138	and munmap() stuff in this slot after doing this call at any time */
				4139
				4140	if (mem->userspace_addr & 0xffffful)
				4141	return -EINVAL;
				4142
				4143	if (mem->memory_size & 0xffffful)
				4144	return -EINVAL;
				4145
				4146	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
				4147	return -EINVAL;
				4148
				4149	return 0;
				4150	}
				4151
				4152	void kvm_arch_commit_memory_region(struct kvm *kvm,
				4153	const struct kvm_userspace_memory_region *mem,
				4154	const struct kvm_memory_slot *old,
				4155	const struct kvm_memory_slot *new,
				4156	enum kvm_mr_change change)
				4157	{
				4158	int rc;
				4159
				4160	/* If the basics of the memslot do not change, we do not want
				4161	* to update the gmap. Every update causes several unnecessary
				4162	* segment translation exceptions. This is usually handled just
				4163	* fine by the normal fault handler + gmap, but it will also
				4164	* cause faults on the prefix page of running guest CPUs.
				4165	*/
				4166	if (old->userspace_addr == mem->userspace_addr &&
				4167	old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
				4168	old->npages * PAGE_SIZE == mem->memory_size)
				4169	return;
				4170
				4171	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
				4172	mem->guest_phys_addr, mem->memory_size);
				4173	if (rc)
				4174	pr_warn("failed to commit memory region\n");
				4175	return;
				4176	}
				4177
				4178	static inline unsigned long nonhyp_mask(int i)
				4179	{
				4180	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
				4181
				4182	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
				4183	}
				4184
				4185	void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
				4186	{
				4187	vcpu->valid_wakeup = false;
				4188	}
				4189
				4190	static int __init kvm_s390_init(void)
				4191	{
				4192	int i;
				4193
				4194	if (!sclp.has_sief2) {
				4195	pr_info("SIE not available\n");
				4196	return -ENODEV;
				4197	}
				4198
				4199	if (nested && hpage) {
				4200	pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
				4201	return -EINVAL;
				4202	}
				4203
				4204	for (i = 0; i < 16; i++)
				4205	kvm_s390_fac_base[i] \|=
				4206	S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
				4207
				4208	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
				4209	}
				4210
				4211	static void __exit kvm_s390_exit(void)
				4212	{
				4213	kvm_exit();
				4214	}
				4215
				4216	module_init(kvm_s390_init);
				4217	module_exit(kvm_s390_exit);
				4218
				4219	/*
				4220	* Enable autoloading of the kvm module.
				4221	* Note that we add the module alias here instead of virt/kvm/kvm_main.c
				4222	* since x86 takes a different approach.
				4223	*/
				4224	#include <linux/miscdevice.h>
				4225	MODULE_ALIAS_MISCDEV(KVM_MINOR);
				4226	MODULE_ALIAS("devname:kvm");