blob: f7150fbeeb55e32e3cddbec102518d47089f195a [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-only
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005 */
6
7#include <linux/bug.h>
8#include <linux/cpu_pm.h>
9#include <linux/errno.h>
10#include <linux/err.h>
11#include <linux/kvm_host.h>
12#include <linux/list.h>
13#include <linux/module.h>
14#include <linux/vmalloc.h>
15#include <linux/fs.h>
16#include <linux/mman.h>
17#include <linux/sched.h>
18#include <linux/kvm.h>
19#include <linux/kvm_irqfd.h>
20#include <linux/irqbypass.h>
21#include <linux/sched/stat.h>
22#include <trace/events/kvm.h>
23#include <kvm/arm_pmu.h>
24#include <kvm/arm_psci.h>
25
26#define CREATE_TRACE_POINTS
27#include "trace.h"
28
29#include <linux/uaccess.h>
30#include <asm/ptrace.h>
31#include <asm/mman.h>
32#include <asm/tlbflush.h>
33#include <asm/cacheflush.h>
34#include <asm/cpufeature.h>
35#include <asm/virt.h>
36#include <asm/kvm_arm.h>
37#include <asm/kvm_asm.h>
38#include <asm/kvm_mmu.h>
39#include <asm/kvm_emulate.h>
40#include <asm/kvm_coproc.h>
41#include <asm/sections.h>
42
43#ifdef REQUIRES_VIRT
44__asm__(".arch_extension virt");
45#endif
46
David Brazdil0f672f62019-12-10 10:32:29 +000047DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000048static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
49
50/* Per-CPU variable containing the currently running vcpu. */
51static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
52
53/* The VMID used in the VTTBR */
54static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
55static u32 kvm_next_vmid;
David Brazdil0f672f62019-12-10 10:32:29 +000056static DEFINE_SPINLOCK(kvm_vmid_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000057
58static bool vgic_present;
59
60static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
61
62static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
63{
64 __this_cpu_write(kvm_arm_running_vcpu, vcpu);
65}
66
67DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
68
69/**
70 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
71 * Must be called from non-preemptible context
72 */
73struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
74{
75 return __this_cpu_read(kvm_arm_running_vcpu);
76}
77
78/**
79 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
80 */
81struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
82{
83 return &kvm_arm_running_vcpu;
84}
85
86int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
87{
88 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
89}
90
91int kvm_arch_hardware_setup(void)
92{
93 return 0;
94}
95
David Brazdil0f672f62019-12-10 10:32:29 +000096int kvm_arch_check_processor_compat(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000097{
David Brazdil0f672f62019-12-10 10:32:29 +000098 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000099}
100
101
102/**
103 * kvm_arch_init_vm - initializes a VM data structure
104 * @kvm: pointer to the KVM struct
105 */
106int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
107{
108 int ret, cpu;
109
David Brazdil0f672f62019-12-10 10:32:29 +0000110 ret = kvm_arm_setup_stage2(kvm, type);
111 if (ret)
112 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000113
114 kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
115 if (!kvm->arch.last_vcpu_ran)
116 return -ENOMEM;
117
118 for_each_possible_cpu(cpu)
119 *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
120
121 ret = kvm_alloc_stage2_pgd(kvm);
122 if (ret)
123 goto out_fail_alloc;
124
125 ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
126 if (ret)
127 goto out_free_stage2_pgd;
128
129 kvm_vgic_early_init(kvm);
130
131 /* Mark the initial VMID generation invalid */
David Brazdil0f672f62019-12-10 10:32:29 +0000132 kvm->arch.vmid.vmid_gen = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000133
134 /* The maximum number of VCPUs is limited by the host's GIC model */
135 kvm->arch.max_vcpus = vgic_present ?
136 kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
137
138 return ret;
139out_free_stage2_pgd:
140 kvm_free_stage2_pgd(kvm);
141out_fail_alloc:
142 free_percpu(kvm->arch.last_vcpu_ran);
143 kvm->arch.last_vcpu_ran = NULL;
144 return ret;
145}
146
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000147int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
148{
149 return 0;
150}
151
152vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
153{
154 return VM_FAULT_SIGBUS;
155}
156
157
158/**
159 * kvm_arch_destroy_vm - destroy the VM data structure
160 * @kvm: pointer to the KVM struct
161 */
162void kvm_arch_destroy_vm(struct kvm *kvm)
163{
164 int i;
165
166 kvm_vgic_destroy(kvm);
167
168 free_percpu(kvm->arch.last_vcpu_ran);
169 kvm->arch.last_vcpu_ran = NULL;
170
171 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
172 if (kvm->vcpus[i]) {
173 kvm_arch_vcpu_free(kvm->vcpus[i]);
174 kvm->vcpus[i] = NULL;
175 }
176 }
177 atomic_set(&kvm->online_vcpus, 0);
178}
179
180int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
181{
182 int r;
183 switch (ext) {
184 case KVM_CAP_IRQCHIP:
185 r = vgic_present;
186 break;
187 case KVM_CAP_IOEVENTFD:
188 case KVM_CAP_DEVICE_CTRL:
189 case KVM_CAP_USER_MEMORY:
190 case KVM_CAP_SYNC_MMU:
191 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
192 case KVM_CAP_ONE_REG:
193 case KVM_CAP_ARM_PSCI:
194 case KVM_CAP_ARM_PSCI_0_2:
195 case KVM_CAP_READONLY_MEM:
196 case KVM_CAP_MP_STATE:
197 case KVM_CAP_IMMEDIATE_EXIT:
David Brazdil0f672f62019-12-10 10:32:29 +0000198 case KVM_CAP_VCPU_EVENTS:
199 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000200 r = 1;
201 break;
202 case KVM_CAP_ARM_SET_DEVICE_ADDR:
203 r = 1;
204 break;
205 case KVM_CAP_NR_VCPUS:
206 r = num_online_cpus();
207 break;
208 case KVM_CAP_MAX_VCPUS:
209 r = KVM_MAX_VCPUS;
210 break;
David Brazdil0f672f62019-12-10 10:32:29 +0000211 case KVM_CAP_MAX_VCPU_ID:
212 r = KVM_MAX_VCPU_ID;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000213 break;
214 case KVM_CAP_MSI_DEVID:
215 if (!kvm)
216 r = -EINVAL;
217 else
218 r = kvm->arch.vgic.msis_require_devid;
219 break;
220 case KVM_CAP_ARM_USER_IRQ:
221 /*
222 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
223 * (bump this number if adding more devices)
224 */
225 r = 1;
226 break;
227 default:
David Brazdil0f672f62019-12-10 10:32:29 +0000228 r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000229 break;
230 }
231 return r;
232}
233
234long kvm_arch_dev_ioctl(struct file *filp,
235 unsigned int ioctl, unsigned long arg)
236{
237 return -EINVAL;
238}
239
240struct kvm *kvm_arch_alloc_vm(void)
241{
242 if (!has_vhe())
243 return kzalloc(sizeof(struct kvm), GFP_KERNEL);
244
245 return vzalloc(sizeof(struct kvm));
246}
247
248void kvm_arch_free_vm(struct kvm *kvm)
249{
250 if (!has_vhe())
251 kfree(kvm);
252 else
253 vfree(kvm);
254}
255
256struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
257{
258 int err;
259 struct kvm_vcpu *vcpu;
260
261 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
262 err = -EBUSY;
263 goto out;
264 }
265
266 if (id >= kvm->arch.max_vcpus) {
267 err = -EINVAL;
268 goto out;
269 }
270
271 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
272 if (!vcpu) {
273 err = -ENOMEM;
274 goto out;
275 }
276
277 err = kvm_vcpu_init(vcpu, kvm, id);
278 if (err)
279 goto free_vcpu;
280
281 err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
282 if (err)
283 goto vcpu_uninit;
284
285 return vcpu;
286vcpu_uninit:
287 kvm_vcpu_uninit(vcpu);
288free_vcpu:
289 kmem_cache_free(kvm_vcpu_cache, vcpu);
290out:
291 return ERR_PTR(err);
292}
293
294void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
295{
296}
297
298void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
299{
300 if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
301 static_branch_dec(&userspace_irqchip_in_use);
302
303 kvm_mmu_free_memory_caches(vcpu);
304 kvm_timer_vcpu_terminate(vcpu);
305 kvm_pmu_vcpu_destroy(vcpu);
306 kvm_vcpu_uninit(vcpu);
307 kmem_cache_free(kvm_vcpu_cache, vcpu);
308}
309
310void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
311{
312 kvm_arch_vcpu_free(vcpu);
313}
314
315int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
316{
317 return kvm_timer_is_pending(vcpu);
318}
319
320void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
321{
David Brazdil0f672f62019-12-10 10:32:29 +0000322 /*
323 * If we're about to block (most likely because we've just hit a
324 * WFI), we need to sync back the state of the GIC CPU interface
325 * so that we have the lastest PMR and group enables. This ensures
326 * that kvm_arch_vcpu_runnable has up-to-date data to decide
327 * whether we have pending interrupts.
328 */
329 preempt_disable();
330 kvm_vgic_vmcr_sync(vcpu);
331 preempt_enable();
332
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000333 kvm_vgic_v4_enable_doorbell(vcpu);
334}
335
336void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
337{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000338 kvm_vgic_v4_disable_doorbell(vcpu);
339}
340
341int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
342{
343 /* Force users to call KVM_ARM_VCPU_INIT */
344 vcpu->arch.target = -1;
345 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
346
347 /* Set up the timer */
348 kvm_timer_vcpu_init(vcpu);
349
David Brazdil0f672f62019-12-10 10:32:29 +0000350 kvm_pmu_vcpu_init(vcpu);
351
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000352 kvm_arm_reset_debug_ptr(vcpu);
353
354 return kvm_vgic_vcpu_init(vcpu);
355}
356
Olivier Deprez0e641232021-09-23 10:07:05 +0200357#ifdef CONFIG_ARM64
358#define __ptrauth_save_key(regs, key) \
359({ \
360 regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
361 regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
362})
363#else
364#define __ptrauth_save_key(regs, key) do { } while (0)
365#endif
366
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000367void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
368{
369 int *last_ran;
David Brazdil0f672f62019-12-10 10:32:29 +0000370 kvm_host_data_t *cpu_data;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000371
372 last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
David Brazdil0f672f62019-12-10 10:32:29 +0000373 cpu_data = this_cpu_ptr(&kvm_host_data);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000374
375 /*
Olivier Deprez0e641232021-09-23 10:07:05 +0200376 * We guarantee that both TLBs and I-cache are private to each
377 * vcpu. If detecting that a vcpu from the same VM has
378 * previously run on the same physical CPU, call into the
379 * hypervisor code to nuke the relevant contexts.
380 *
381 * We might get preempted before the vCPU actually runs, but
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000382 * We might get preempted before the vCPU actually runs, but
383 * over-invalidation doesn't affect correctness.
384 */
385 if (*last_ran != vcpu->vcpu_id) {
Olivier Deprez0e641232021-09-23 10:07:05 +0200386 kvm_call_hyp(__kvm_flush_cpu_context, vcpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000387 *last_ran = vcpu->vcpu_id;
388 }
389
390 vcpu->cpu = cpu;
David Brazdil0f672f62019-12-10 10:32:29 +0000391 vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000392
393 kvm_arm_set_running_vcpu(vcpu);
394 kvm_vgic_load(vcpu);
395 kvm_timer_vcpu_load(vcpu);
396 kvm_vcpu_load_sysregs(vcpu);
397 kvm_arch_vcpu_load_fp(vcpu);
David Brazdil0f672f62019-12-10 10:32:29 +0000398 kvm_vcpu_pmu_restore_guest(vcpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000399
400 if (single_task_running())
401 vcpu_clear_wfe_traps(vcpu);
402 else
403 vcpu_set_wfe_traps(vcpu);
David Brazdil0f672f62019-12-10 10:32:29 +0000404
Olivier Deprez0e641232021-09-23 10:07:05 +0200405 if (vcpu_has_ptrauth(vcpu)) {
406 struct kvm_cpu_context __maybe_unused *ctxt = vcpu->arch.host_cpu_context;
407
408 __ptrauth_save_key(ctxt->sys_regs, APIA);
409 __ptrauth_save_key(ctxt->sys_regs, APIB);
410 __ptrauth_save_key(ctxt->sys_regs, APDA);
411 __ptrauth_save_key(ctxt->sys_regs, APDB);
412 __ptrauth_save_key(ctxt->sys_regs, APGA);
413
414 vcpu_ptrauth_disable(vcpu);
415 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000416}
417
418void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
419{
420 kvm_arch_vcpu_put_fp(vcpu);
421 kvm_vcpu_put_sysregs(vcpu);
422 kvm_timer_vcpu_put(vcpu);
423 kvm_vgic_put(vcpu);
David Brazdil0f672f62019-12-10 10:32:29 +0000424 kvm_vcpu_pmu_restore_host(vcpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000425
426 vcpu->cpu = -1;
427
428 kvm_arm_set_running_vcpu(NULL);
429}
430
431static void vcpu_power_off(struct kvm_vcpu *vcpu)
432{
433 vcpu->arch.power_off = true;
434 kvm_make_request(KVM_REQ_SLEEP, vcpu);
435 kvm_vcpu_kick(vcpu);
436}
437
438int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
439 struct kvm_mp_state *mp_state)
440{
441 if (vcpu->arch.power_off)
442 mp_state->mp_state = KVM_MP_STATE_STOPPED;
443 else
444 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
445
446 return 0;
447}
448
449int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
450 struct kvm_mp_state *mp_state)
451{
452 int ret = 0;
453
454 switch (mp_state->mp_state) {
455 case KVM_MP_STATE_RUNNABLE:
456 vcpu->arch.power_off = false;
457 break;
458 case KVM_MP_STATE_STOPPED:
459 vcpu_power_off(vcpu);
460 break;
461 default:
462 ret = -EINVAL;
463 }
464
465 return ret;
466}
467
468/**
469 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
470 * @v: The VCPU pointer
471 *
472 * If the guest CPU is not waiting for interrupts or an interrupt line is
473 * asserted, the CPU is by definition runnable.
474 */
475int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
476{
477 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
478 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
479 && !v->arch.power_off && !v->arch.pause);
480}
481
482bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
483{
484 return vcpu_mode_priv(vcpu);
485}
486
487/* Just ensure a guest exit from a particular CPU */
488static void exit_vm_noop(void *info)
489{
490}
491
492void force_vm_exit(const cpumask_t *mask)
493{
494 preempt_disable();
495 smp_call_function_many(mask, exit_vm_noop, NULL, true);
496 preempt_enable();
497}
498
499/**
500 * need_new_vmid_gen - check that the VMID is still valid
David Brazdil0f672f62019-12-10 10:32:29 +0000501 * @vmid: The VMID to check
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000502 *
503 * return true if there is a new generation of VMIDs being used
504 *
David Brazdil0f672f62019-12-10 10:32:29 +0000505 * The hardware supports a limited set of values with the value zero reserved
506 * for the host, so we check if an assigned value belongs to a previous
507 * generation, which which requires us to assign a new value. If we're the
508 * first to use a VMID for the new generation, we must flush necessary caches
509 * and TLBs on all CPUs.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000510 */
David Brazdil0f672f62019-12-10 10:32:29 +0000511static bool need_new_vmid_gen(struct kvm_vmid *vmid)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000512{
David Brazdil0f672f62019-12-10 10:32:29 +0000513 u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
514 smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
515 return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000516}
517
518/**
David Brazdil0f672f62019-12-10 10:32:29 +0000519 * update_vmid - Update the vmid with a valid VMID for the current generation
520 * @kvm: The guest that struct vmid belongs to
521 * @vmid: The stage-2 VMID information struct
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000522 */
David Brazdil0f672f62019-12-10 10:32:29 +0000523static void update_vmid(struct kvm_vmid *vmid)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000524{
David Brazdil0f672f62019-12-10 10:32:29 +0000525 if (!need_new_vmid_gen(vmid))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000526 return;
527
David Brazdil0f672f62019-12-10 10:32:29 +0000528 spin_lock(&kvm_vmid_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000529
530 /*
531 * We need to re-check the vmid_gen here to ensure that if another vcpu
532 * already allocated a valid vmid for this vm, then this vcpu should
533 * use the same vmid.
534 */
David Brazdil0f672f62019-12-10 10:32:29 +0000535 if (!need_new_vmid_gen(vmid)) {
536 spin_unlock(&kvm_vmid_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000537 return;
538 }
539
540 /* First user of a new VMID generation? */
541 if (unlikely(kvm_next_vmid == 0)) {
542 atomic64_inc(&kvm_vmid_gen);
543 kvm_next_vmid = 1;
544
545 /*
546 * On SMP we know no other CPUs can use this CPU's or each
547 * other's VMID after force_vm_exit returns since the
548 * kvm_vmid_lock blocks them from reentry to the guest.
549 */
550 force_vm_exit(cpu_all_mask);
551 /*
552 * Now broadcast TLB + ICACHE invalidation over the inner
553 * shareable domain to make sure all data structures are
554 * clean.
555 */
556 kvm_call_hyp(__kvm_flush_vm_context);
557 }
558
David Brazdil0f672f62019-12-10 10:32:29 +0000559 vmid->vmid = kvm_next_vmid;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000560 kvm_next_vmid++;
David Brazdil0f672f62019-12-10 10:32:29 +0000561 kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000562
David Brazdil0f672f62019-12-10 10:32:29 +0000563 smp_wmb();
564 WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000565
David Brazdil0f672f62019-12-10 10:32:29 +0000566 spin_unlock(&kvm_vmid_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000567}
568
569static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
570{
571 struct kvm *kvm = vcpu->kvm;
572 int ret = 0;
573
574 if (likely(vcpu->arch.has_run_once))
575 return 0;
576
David Brazdil0f672f62019-12-10 10:32:29 +0000577 if (!kvm_arm_vcpu_is_finalized(vcpu))
578 return -EPERM;
579
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000580 vcpu->arch.has_run_once = true;
581
Olivier Deprez0e641232021-09-23 10:07:05 +0200582 kvm_arm_vcpu_init_debug(vcpu);
583
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000584 if (likely(irqchip_in_kernel(kvm))) {
585 /*
586 * Map the VGIC hardware resources before running a vcpu the
587 * first time on this VM.
588 */
589 if (unlikely(!vgic_ready(kvm))) {
590 ret = kvm_vgic_map_resources(kvm);
591 if (ret)
592 return ret;
593 }
594 } else {
595 /*
596 * Tell the rest of the code that there are userspace irqchip
597 * VMs in the wild.
598 */
599 static_branch_inc(&userspace_irqchip_in_use);
600 }
601
602 ret = kvm_timer_enable(vcpu);
603 if (ret)
604 return ret;
605
606 ret = kvm_arm_pmu_v3_enable(vcpu);
607
608 return ret;
609}
610
611bool kvm_arch_intc_initialized(struct kvm *kvm)
612{
613 return vgic_initialized(kvm);
614}
615
616void kvm_arm_halt_guest(struct kvm *kvm)
617{
618 int i;
619 struct kvm_vcpu *vcpu;
620
621 kvm_for_each_vcpu(i, vcpu, kvm)
622 vcpu->arch.pause = true;
623 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
624}
625
626void kvm_arm_resume_guest(struct kvm *kvm)
627{
628 int i;
629 struct kvm_vcpu *vcpu;
630
631 kvm_for_each_vcpu(i, vcpu, kvm) {
632 vcpu->arch.pause = false;
633 swake_up_one(kvm_arch_vcpu_wq(vcpu));
634 }
635}
636
637static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
638{
639 struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
640
641 swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
642 (!vcpu->arch.pause)));
643
644 if (vcpu->arch.power_off || vcpu->arch.pause) {
645 /* Awaken to handle a signal, request we sleep again later. */
646 kvm_make_request(KVM_REQ_SLEEP, vcpu);
647 }
David Brazdil0f672f62019-12-10 10:32:29 +0000648
649 /*
650 * Make sure we will observe a potential reset request if we've
651 * observed a change to the power state. Pairs with the smp_wmb() in
652 * kvm_psci_vcpu_on().
653 */
654 smp_rmb();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000655}
656
657static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
658{
659 return vcpu->arch.target >= 0;
660}
661
662static void check_vcpu_requests(struct kvm_vcpu *vcpu)
663{
664 if (kvm_request_pending(vcpu)) {
665 if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
666 vcpu_req_sleep(vcpu);
667
David Brazdil0f672f62019-12-10 10:32:29 +0000668 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
669 kvm_reset_vcpu(vcpu);
670
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000671 /*
672 * Clear IRQ_PENDING requests that were made to guarantee
673 * that a VCPU sees new virtual interrupts.
674 */
675 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
676 }
677}
678
679/**
680 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
681 * @vcpu: The VCPU pointer
682 * @run: The kvm_run structure pointer used for userspace state exchange
683 *
684 * This function is called through the VCPU_RUN ioctl called from user space. It
685 * will execute VM code in a loop until the time slice for the process is used
686 * or some emulation is needed from user space in which case the function will
687 * return with return value 0 and with the kvm_run structure filled in with the
688 * required data for the requested emulation.
689 */
690int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
691{
692 int ret;
693
694 if (unlikely(!kvm_vcpu_initialized(vcpu)))
695 return -ENOEXEC;
696
697 ret = kvm_vcpu_first_run_init(vcpu);
698 if (ret)
699 return ret;
700
701 if (run->exit_reason == KVM_EXIT_MMIO) {
702 ret = kvm_handle_mmio_return(vcpu, vcpu->run);
703 if (ret)
704 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000705 }
706
707 if (run->immediate_exit)
708 return -EINTR;
709
710 vcpu_load(vcpu);
711
712 kvm_sigset_activate(vcpu);
713
714 ret = 1;
715 run->exit_reason = KVM_EXIT_UNKNOWN;
716 while (ret > 0) {
717 /*
718 * Check conditions before entering the guest
719 */
720 cond_resched();
721
David Brazdil0f672f62019-12-10 10:32:29 +0000722 update_vmid(&vcpu->kvm->arch.vmid);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000723
724 check_vcpu_requests(vcpu);
725
726 /*
727 * Preparing the interrupts to be injected also
728 * involves poking the GIC, which must be done in a
729 * non-preemptible context.
730 */
731 preempt_disable();
732
733 kvm_pmu_flush_hwstate(vcpu);
734
735 local_irq_disable();
736
737 kvm_vgic_flush_hwstate(vcpu);
738
739 /*
740 * Exit if we have a signal pending so that we can deliver the
741 * signal to user space.
742 */
743 if (signal_pending(current)) {
744 ret = -EINTR;
745 run->exit_reason = KVM_EXIT_INTR;
746 }
747
748 /*
749 * If we're using a userspace irqchip, then check if we need
750 * to tell a userspace irqchip about timer or PMU level
751 * changes and if so, exit to userspace (the actual level
752 * state gets updated in kvm_timer_update_run and
753 * kvm_pmu_update_run below).
754 */
755 if (static_branch_unlikely(&userspace_irqchip_in_use)) {
756 if (kvm_timer_should_notify_user(vcpu) ||
757 kvm_pmu_should_notify_user(vcpu)) {
758 ret = -EINTR;
759 run->exit_reason = KVM_EXIT_INTR;
760 }
761 }
762
763 /*
764 * Ensure we set mode to IN_GUEST_MODE after we disable
765 * interrupts and before the final VCPU requests check.
766 * See the comment in kvm_vcpu_exiting_guest_mode() and
David Brazdil0f672f62019-12-10 10:32:29 +0000767 * Documentation/virt/kvm/vcpu-requests.rst
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000768 */
769 smp_store_mb(vcpu->mode, IN_GUEST_MODE);
770
David Brazdil0f672f62019-12-10 10:32:29 +0000771 if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000772 kvm_request_pending(vcpu)) {
773 vcpu->mode = OUTSIDE_GUEST_MODE;
774 isb(); /* Ensure work in x_flush_hwstate is committed */
775 kvm_pmu_sync_hwstate(vcpu);
776 if (static_branch_unlikely(&userspace_irqchip_in_use))
777 kvm_timer_sync_hwstate(vcpu);
778 kvm_vgic_sync_hwstate(vcpu);
779 local_irq_enable();
780 preempt_enable();
781 continue;
782 }
783
784 kvm_arm_setup_debug(vcpu);
785
786 /**************************************************************
787 * Enter the guest
788 */
789 trace_kvm_entry(*vcpu_pc(vcpu));
790 guest_enter_irqoff();
791
792 if (has_vhe()) {
793 kvm_arm_vhe_guest_enter();
794 ret = kvm_vcpu_run_vhe(vcpu);
795 kvm_arm_vhe_guest_exit();
796 } else {
David Brazdil0f672f62019-12-10 10:32:29 +0000797 ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000798 }
799
800 vcpu->mode = OUTSIDE_GUEST_MODE;
801 vcpu->stat.exits++;
802 /*
803 * Back from guest
804 *************************************************************/
805
806 kvm_arm_clear_debug(vcpu);
807
808 /*
809 * We must sync the PMU state before the vgic state so
810 * that the vgic can properly sample the updated state of the
811 * interrupt line.
812 */
813 kvm_pmu_sync_hwstate(vcpu);
814
815 /*
816 * Sync the vgic state before syncing the timer state because
817 * the timer code needs to know if the virtual timer
818 * interrupts are active.
819 */
820 kvm_vgic_sync_hwstate(vcpu);
821
822 /*
823 * Sync the timer hardware state before enabling interrupts as
824 * we don't want vtimer interrupts to race with syncing the
825 * timer virtual interrupt state.
826 */
827 if (static_branch_unlikely(&userspace_irqchip_in_use))
828 kvm_timer_sync_hwstate(vcpu);
829
830 kvm_arch_vcpu_ctxsync_fp(vcpu);
831
832 /*
833 * We may have taken a host interrupt in HYP mode (ie
834 * while executing the guest). This interrupt is still
835 * pending, as we haven't serviced it yet!
836 *
837 * We're now back in SVC mode, with interrupts
838 * disabled. Enabling the interrupts now will have
839 * the effect of taking the interrupt again, in SVC
840 * mode this time.
841 */
842 local_irq_enable();
843
844 /*
845 * We do local_irq_enable() before calling guest_exit() so
846 * that if a timer interrupt hits while running the guest we
847 * account that tick as being spent in the guest. We enable
848 * preemption after calling guest_exit() so that if we get
849 * preempted we make sure ticks after that is not counted as
850 * guest time.
851 */
852 guest_exit();
853 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
854
855 /* Exit types that need handling before we can be preempted */
856 handle_exit_early(vcpu, run, ret);
857
858 preempt_enable();
859
860 ret = handle_exit(vcpu, run, ret);
861 }
862
863 /* Tell userspace about in-kernel device output levels */
864 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
865 kvm_timer_update_run(vcpu);
866 kvm_pmu_update_run(vcpu);
867 }
868
869 kvm_sigset_deactivate(vcpu);
870
871 vcpu_put(vcpu);
872 return ret;
873}
874
875static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
876{
877 int bit_index;
878 bool set;
879 unsigned long *hcr;
880
881 if (number == KVM_ARM_IRQ_CPU_IRQ)
882 bit_index = __ffs(HCR_VI);
883 else /* KVM_ARM_IRQ_CPU_FIQ */
884 bit_index = __ffs(HCR_VF);
885
886 hcr = vcpu_hcr(vcpu);
887 if (level)
888 set = test_and_set_bit(bit_index, hcr);
889 else
890 set = test_and_clear_bit(bit_index, hcr);
891
892 /*
893 * If we didn't change anything, no need to wake up or kick other CPUs
894 */
895 if (set == level)
896 return 0;
897
898 /*
899 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
900 * trigger a world-switch round on the running physical CPU to set the
901 * virtual IRQ/FIQ fields in the HCR appropriately.
902 */
903 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
904 kvm_vcpu_kick(vcpu);
905
906 return 0;
907}
908
909int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
910 bool line_status)
911{
912 u32 irq = irq_level->irq;
913 unsigned int irq_type, vcpu_idx, irq_num;
914 int nrcpus = atomic_read(&kvm->online_vcpus);
915 struct kvm_vcpu *vcpu = NULL;
916 bool level = irq_level->level;
917
918 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
919 vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
David Brazdil0f672f62019-12-10 10:32:29 +0000920 vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000921 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
922
923 trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
924
925 switch (irq_type) {
926 case KVM_ARM_IRQ_TYPE_CPU:
927 if (irqchip_in_kernel(kvm))
928 return -ENXIO;
929
930 if (vcpu_idx >= nrcpus)
931 return -EINVAL;
932
933 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
934 if (!vcpu)
935 return -EINVAL;
936
937 if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
938 return -EINVAL;
939
940 return vcpu_interrupt_line(vcpu, irq_num, level);
941 case KVM_ARM_IRQ_TYPE_PPI:
942 if (!irqchip_in_kernel(kvm))
943 return -ENXIO;
944
945 if (vcpu_idx >= nrcpus)
946 return -EINVAL;
947
948 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
949 if (!vcpu)
950 return -EINVAL;
951
952 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
953 return -EINVAL;
954
955 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
956 case KVM_ARM_IRQ_TYPE_SPI:
957 if (!irqchip_in_kernel(kvm))
958 return -ENXIO;
959
960 if (irq_num < VGIC_NR_PRIVATE_IRQS)
961 return -EINVAL;
962
963 return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
964 }
965
966 return -EINVAL;
967}
968
969static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
970 const struct kvm_vcpu_init *init)
971{
David Brazdil0f672f62019-12-10 10:32:29 +0000972 unsigned int i, ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000973 int phys_target = kvm_target_cpu();
974
975 if (init->target != phys_target)
976 return -EINVAL;
977
978 /*
979 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
980 * use the same target.
981 */
982 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
983 return -EINVAL;
984
985 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
986 for (i = 0; i < sizeof(init->features) * 8; i++) {
987 bool set = (init->features[i / 32] & (1 << (i % 32)));
988
989 if (set && i >= KVM_VCPU_MAX_FEATURES)
990 return -ENOENT;
991
992 /*
993 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
994 * use the same feature set.
995 */
996 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
997 test_bit(i, vcpu->arch.features) != set)
998 return -EINVAL;
999
1000 if (set)
1001 set_bit(i, vcpu->arch.features);
1002 }
1003
1004 vcpu->arch.target = phys_target;
1005
1006 /* Now we know what it is, we can reset it. */
David Brazdil0f672f62019-12-10 10:32:29 +00001007 ret = kvm_reset_vcpu(vcpu);
1008 if (ret) {
1009 vcpu->arch.target = -1;
1010 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
1011 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001012
David Brazdil0f672f62019-12-10 10:32:29 +00001013 return ret;
1014}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001015
1016static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
1017 struct kvm_vcpu_init *init)
1018{
1019 int ret;
1020
1021 ret = kvm_vcpu_set_target(vcpu, init);
1022 if (ret)
1023 return ret;
1024
1025 /*
1026 * Ensure a rebooted VM will fault in RAM pages and detect if the
1027 * guest MMU is turned off and flush the caches as needed.
1028 */
1029 if (vcpu->arch.has_run_once)
1030 stage2_unmap_vm(vcpu->kvm);
1031
1032 vcpu_reset_hcr(vcpu);
1033
1034 /*
1035 * Handle the "start in power-off" case.
1036 */
1037 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1038 vcpu_power_off(vcpu);
1039 else
1040 vcpu->arch.power_off = false;
1041
1042 return 0;
1043}
1044
1045static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1046 struct kvm_device_attr *attr)
1047{
1048 int ret = -ENXIO;
1049
1050 switch (attr->group) {
1051 default:
1052 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1053 break;
1054 }
1055
1056 return ret;
1057}
1058
1059static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1060 struct kvm_device_attr *attr)
1061{
1062 int ret = -ENXIO;
1063
1064 switch (attr->group) {
1065 default:
1066 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1067 break;
1068 }
1069
1070 return ret;
1071}
1072
1073static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1074 struct kvm_device_attr *attr)
1075{
1076 int ret = -ENXIO;
1077
1078 switch (attr->group) {
1079 default:
1080 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1081 break;
1082 }
1083
1084 return ret;
1085}
1086
1087static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1088 struct kvm_vcpu_events *events)
1089{
1090 memset(events, 0, sizeof(*events));
1091
1092 return __kvm_arm_vcpu_get_events(vcpu, events);
1093}
1094
1095static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1096 struct kvm_vcpu_events *events)
1097{
1098 int i;
1099
1100 /* check whether the reserved field is zero */
1101 for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1102 if (events->reserved[i])
1103 return -EINVAL;
1104
1105 /* check whether the pad field is zero */
1106 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1107 if (events->exception.pad[i])
1108 return -EINVAL;
1109
1110 return __kvm_arm_vcpu_set_events(vcpu, events);
1111}
1112
1113long kvm_arch_vcpu_ioctl(struct file *filp,
1114 unsigned int ioctl, unsigned long arg)
1115{
1116 struct kvm_vcpu *vcpu = filp->private_data;
1117 void __user *argp = (void __user *)arg;
1118 struct kvm_device_attr attr;
1119 long r;
1120
1121 switch (ioctl) {
1122 case KVM_ARM_VCPU_INIT: {
1123 struct kvm_vcpu_init init;
1124
1125 r = -EFAULT;
1126 if (copy_from_user(&init, argp, sizeof(init)))
1127 break;
1128
1129 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1130 break;
1131 }
1132 case KVM_SET_ONE_REG:
1133 case KVM_GET_ONE_REG: {
1134 struct kvm_one_reg reg;
1135
1136 r = -ENOEXEC;
1137 if (unlikely(!kvm_vcpu_initialized(vcpu)))
1138 break;
1139
1140 r = -EFAULT;
1141 if (copy_from_user(&reg, argp, sizeof(reg)))
1142 break;
1143
Olivier Deprez0e641232021-09-23 10:07:05 +02001144 /*
1145 * We could owe a reset due to PSCI. Handle the pending reset
1146 * here to ensure userspace register accesses are ordered after
1147 * the reset.
1148 */
1149 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
1150 kvm_reset_vcpu(vcpu);
1151
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001152 if (ioctl == KVM_SET_ONE_REG)
1153 r = kvm_arm_set_reg(vcpu, &reg);
1154 else
1155 r = kvm_arm_get_reg(vcpu, &reg);
1156 break;
1157 }
1158 case KVM_GET_REG_LIST: {
1159 struct kvm_reg_list __user *user_list = argp;
1160 struct kvm_reg_list reg_list;
1161 unsigned n;
1162
1163 r = -ENOEXEC;
1164 if (unlikely(!kvm_vcpu_initialized(vcpu)))
1165 break;
1166
David Brazdil0f672f62019-12-10 10:32:29 +00001167 r = -EPERM;
1168 if (!kvm_arm_vcpu_is_finalized(vcpu))
1169 break;
1170
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001171 r = -EFAULT;
1172 if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
1173 break;
1174 n = reg_list.n;
1175 reg_list.n = kvm_arm_num_regs(vcpu);
1176 if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
1177 break;
1178 r = -E2BIG;
1179 if (n < reg_list.n)
1180 break;
1181 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1182 break;
1183 }
1184 case KVM_SET_DEVICE_ATTR: {
1185 r = -EFAULT;
1186 if (copy_from_user(&attr, argp, sizeof(attr)))
1187 break;
1188 r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1189 break;
1190 }
1191 case KVM_GET_DEVICE_ATTR: {
1192 r = -EFAULT;
1193 if (copy_from_user(&attr, argp, sizeof(attr)))
1194 break;
1195 r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1196 break;
1197 }
1198 case KVM_HAS_DEVICE_ATTR: {
1199 r = -EFAULT;
1200 if (copy_from_user(&attr, argp, sizeof(attr)))
1201 break;
1202 r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1203 break;
1204 }
1205 case KVM_GET_VCPU_EVENTS: {
1206 struct kvm_vcpu_events events;
1207
1208 if (kvm_arm_vcpu_get_events(vcpu, &events))
1209 return -EINVAL;
1210
1211 if (copy_to_user(argp, &events, sizeof(events)))
1212 return -EFAULT;
1213
1214 return 0;
1215 }
1216 case KVM_SET_VCPU_EVENTS: {
1217 struct kvm_vcpu_events events;
1218
1219 if (copy_from_user(&events, argp, sizeof(events)))
1220 return -EFAULT;
1221
1222 return kvm_arm_vcpu_set_events(vcpu, &events);
1223 }
David Brazdil0f672f62019-12-10 10:32:29 +00001224 case KVM_ARM_VCPU_FINALIZE: {
1225 int what;
1226
1227 if (!kvm_vcpu_initialized(vcpu))
1228 return -ENOEXEC;
1229
1230 if (get_user(what, (const int __user *)argp))
1231 return -EFAULT;
1232
1233 return kvm_arm_vcpu_finalize(vcpu, what);
1234 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001235 default:
1236 r = -EINVAL;
1237 }
1238
1239 return r;
1240}
1241
1242/**
1243 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
1244 * @kvm: kvm instance
1245 * @log: slot id and address to which we copy the log
1246 *
1247 * Steps 1-4 below provide general overview of dirty page logging. See
1248 * kvm_get_dirty_log_protect() function description for additional details.
1249 *
1250 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
1251 * always flush the TLB (step 4) even if previous step failed and the dirty
1252 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
1253 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
1254 * writes will be marked dirty for next log read.
1255 *
1256 * 1. Take a snapshot of the bit and clear it if needed.
1257 * 2. Write protect the corresponding page.
1258 * 3. Copy the snapshot to the userspace.
1259 * 4. Flush TLB's if needed.
1260 */
1261int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1262{
David Brazdil0f672f62019-12-10 10:32:29 +00001263 bool flush = false;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001264 int r;
1265
1266 mutex_lock(&kvm->slots_lock);
1267
David Brazdil0f672f62019-12-10 10:32:29 +00001268 r = kvm_get_dirty_log_protect(kvm, log, &flush);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001269
David Brazdil0f672f62019-12-10 10:32:29 +00001270 if (flush)
1271 kvm_flush_remote_tlbs(kvm);
1272
1273 mutex_unlock(&kvm->slots_lock);
1274 return r;
1275}
1276
1277int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
1278{
1279 bool flush = false;
1280 int r;
1281
1282 mutex_lock(&kvm->slots_lock);
1283
1284 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
1285
1286 if (flush)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001287 kvm_flush_remote_tlbs(kvm);
1288
1289 mutex_unlock(&kvm->slots_lock);
1290 return r;
1291}
1292
1293static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1294 struct kvm_arm_device_addr *dev_addr)
1295{
1296 unsigned long dev_id, type;
1297
1298 dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1299 KVM_ARM_DEVICE_ID_SHIFT;
1300 type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1301 KVM_ARM_DEVICE_TYPE_SHIFT;
1302
1303 switch (dev_id) {
1304 case KVM_ARM_DEVICE_VGIC_V2:
1305 if (!vgic_present)
1306 return -ENXIO;
1307 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1308 default:
1309 return -ENODEV;
1310 }
1311}
1312
1313long kvm_arch_vm_ioctl(struct file *filp,
1314 unsigned int ioctl, unsigned long arg)
1315{
1316 struct kvm *kvm = filp->private_data;
1317 void __user *argp = (void __user *)arg;
1318
1319 switch (ioctl) {
1320 case KVM_CREATE_IRQCHIP: {
1321 int ret;
1322 if (!vgic_present)
1323 return -ENXIO;
1324 mutex_lock(&kvm->lock);
1325 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1326 mutex_unlock(&kvm->lock);
1327 return ret;
1328 }
1329 case KVM_ARM_SET_DEVICE_ADDR: {
1330 struct kvm_arm_device_addr dev_addr;
1331
1332 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1333 return -EFAULT;
1334 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1335 }
1336 case KVM_ARM_PREFERRED_TARGET: {
1337 int err;
1338 struct kvm_vcpu_init init;
1339
1340 err = kvm_vcpu_preferred_target(&init);
1341 if (err)
1342 return err;
1343
1344 if (copy_to_user(argp, &init, sizeof(init)))
1345 return -EFAULT;
1346
1347 return 0;
1348 }
1349 default:
1350 return -EINVAL;
1351 }
1352}
1353
1354static void cpu_init_hyp_mode(void *dummy)
1355{
1356 phys_addr_t pgd_ptr;
1357 unsigned long hyp_stack_ptr;
1358 unsigned long stack_page;
1359 unsigned long vector_ptr;
1360
1361 /* Switch from the HYP stub to our own HYP init vector */
1362 __hyp_set_vectors(kvm_get_idmap_vector());
1363
1364 pgd_ptr = kvm_mmu_get_httbr();
1365 stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
1366 hyp_stack_ptr = stack_page + PAGE_SIZE;
1367 vector_ptr = (unsigned long)kvm_get_hyp_vector();
1368
1369 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
1370 __cpu_init_stage2();
1371}
1372
1373static void cpu_hyp_reset(void)
1374{
1375 if (!is_kernel_in_hyp_mode())
1376 __hyp_reset_vectors();
1377}
1378
1379static void cpu_hyp_reinit(void)
1380{
David Brazdil0f672f62019-12-10 10:32:29 +00001381 kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
1382
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001383 cpu_hyp_reset();
1384
David Brazdil0f672f62019-12-10 10:32:29 +00001385 if (is_kernel_in_hyp_mode())
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001386 kvm_timer_init_vhe();
David Brazdil0f672f62019-12-10 10:32:29 +00001387 else
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001388 cpu_init_hyp_mode(NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001389
1390 kvm_arm_init_debug();
1391
1392 if (vgic_present)
1393 kvm_vgic_init_cpu_hardware();
1394}
1395
1396static void _kvm_arch_hardware_enable(void *discard)
1397{
1398 if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1399 cpu_hyp_reinit();
1400 __this_cpu_write(kvm_arm_hardware_enabled, 1);
1401 }
1402}
1403
1404int kvm_arch_hardware_enable(void)
1405{
1406 _kvm_arch_hardware_enable(NULL);
1407 return 0;
1408}
1409
1410static void _kvm_arch_hardware_disable(void *discard)
1411{
1412 if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1413 cpu_hyp_reset();
1414 __this_cpu_write(kvm_arm_hardware_enabled, 0);
1415 }
1416}
1417
1418void kvm_arch_hardware_disable(void)
1419{
1420 _kvm_arch_hardware_disable(NULL);
1421}
1422
1423#ifdef CONFIG_CPU_PM
1424static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1425 unsigned long cmd,
1426 void *v)
1427{
1428 /*
1429 * kvm_arm_hardware_enabled is left with its old value over
1430 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
1431 * re-enable hyp.
1432 */
1433 switch (cmd) {
1434 case CPU_PM_ENTER:
1435 if (__this_cpu_read(kvm_arm_hardware_enabled))
1436 /*
1437 * don't update kvm_arm_hardware_enabled here
1438 * so that the hardware will be re-enabled
1439 * when we resume. See below.
1440 */
1441 cpu_hyp_reset();
1442
1443 return NOTIFY_OK;
1444 case CPU_PM_ENTER_FAILED:
1445 case CPU_PM_EXIT:
1446 if (__this_cpu_read(kvm_arm_hardware_enabled))
1447 /* The hardware was enabled before suspend. */
1448 cpu_hyp_reinit();
1449
1450 return NOTIFY_OK;
1451
1452 default:
1453 return NOTIFY_DONE;
1454 }
1455}
1456
1457static struct notifier_block hyp_init_cpu_pm_nb = {
1458 .notifier_call = hyp_init_cpu_pm_notifier,
1459};
1460
1461static void __init hyp_cpu_pm_init(void)
1462{
1463 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1464}
1465static void __init hyp_cpu_pm_exit(void)
1466{
1467 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1468}
1469#else
1470static inline void hyp_cpu_pm_init(void)
1471{
1472}
1473static inline void hyp_cpu_pm_exit(void)
1474{
1475}
1476#endif
1477
1478static int init_common_resources(void)
1479{
David Brazdil0f672f62019-12-10 10:32:29 +00001480 kvm_set_ipa_limit();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001481
1482 return 0;
1483}
1484
1485static int init_subsystems(void)
1486{
1487 int err = 0;
1488
1489 /*
1490 * Enable hardware so that subsystem initialisation can access EL2.
1491 */
1492 on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1493
1494 /*
1495 * Register CPU lower-power notifier
1496 */
1497 hyp_cpu_pm_init();
1498
1499 /*
1500 * Init HYP view of VGIC
1501 */
1502 err = kvm_vgic_hyp_init();
1503 switch (err) {
1504 case 0:
1505 vgic_present = true;
1506 break;
1507 case -ENODEV:
1508 case -ENXIO:
1509 vgic_present = false;
1510 err = 0;
1511 break;
1512 default:
1513 goto out;
1514 }
1515
1516 /*
1517 * Init HYP architected timer support
1518 */
1519 err = kvm_timer_hyp_init(vgic_present);
1520 if (err)
1521 goto out;
1522
1523 kvm_perf_init();
1524 kvm_coproc_table_init();
1525
1526out:
1527 on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1528
1529 return err;
1530}
1531
1532static void teardown_hyp_mode(void)
1533{
1534 int cpu;
1535
1536 free_hyp_pgds();
1537 for_each_possible_cpu(cpu)
1538 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1539 hyp_cpu_pm_exit();
1540}
1541
1542/**
1543 * Inits Hyp-mode on all online CPUs
1544 */
1545static int init_hyp_mode(void)
1546{
1547 int cpu;
1548 int err = 0;
1549
1550 /*
1551 * Allocate Hyp PGD and setup Hyp identity mapping
1552 */
1553 err = kvm_mmu_init();
1554 if (err)
1555 goto out_err;
1556
1557 /*
1558 * Allocate stack pages for Hypervisor-mode
1559 */
1560 for_each_possible_cpu(cpu) {
1561 unsigned long stack_page;
1562
1563 stack_page = __get_free_page(GFP_KERNEL);
1564 if (!stack_page) {
1565 err = -ENOMEM;
1566 goto out_err;
1567 }
1568
1569 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1570 }
1571
1572 /*
1573 * Map the Hyp-code called directly from the host
1574 */
1575 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1576 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1577 if (err) {
1578 kvm_err("Cannot map world-switch code\n");
1579 goto out_err;
1580 }
1581
1582 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1583 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1584 if (err) {
1585 kvm_err("Cannot map rodata section\n");
1586 goto out_err;
1587 }
1588
1589 err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
1590 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1591 if (err) {
1592 kvm_err("Cannot map bss section\n");
1593 goto out_err;
1594 }
1595
1596 err = kvm_map_vectors();
1597 if (err) {
1598 kvm_err("Cannot map vectors\n");
1599 goto out_err;
1600 }
1601
1602 /*
1603 * Map the Hyp stack pages
1604 */
1605 for_each_possible_cpu(cpu) {
1606 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1607 err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1608 PAGE_HYP);
1609
1610 if (err) {
1611 kvm_err("Cannot map hyp stack\n");
1612 goto out_err;
1613 }
1614 }
1615
1616 for_each_possible_cpu(cpu) {
David Brazdil0f672f62019-12-10 10:32:29 +00001617 kvm_host_data_t *cpu_data;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001618
David Brazdil0f672f62019-12-10 10:32:29 +00001619 cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
1620 err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001621
1622 if (err) {
1623 kvm_err("Cannot map host CPU state: %d\n", err);
1624 goto out_err;
1625 }
1626 }
1627
1628 err = hyp_map_aux_data();
1629 if (err)
David Brazdil0f672f62019-12-10 10:32:29 +00001630 kvm_err("Cannot map host auxiliary data: %d\n", err);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001631
1632 return 0;
1633
1634out_err:
1635 teardown_hyp_mode();
1636 kvm_err("error initializing Hyp mode: %d\n", err);
1637 return err;
1638}
1639
1640static void check_kvm_target_cpu(void *ret)
1641{
1642 *(int *)ret = kvm_target_cpu();
1643}
1644
1645struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1646{
1647 struct kvm_vcpu *vcpu;
1648 int i;
1649
1650 mpidr &= MPIDR_HWID_BITMASK;
1651 kvm_for_each_vcpu(i, vcpu, kvm) {
1652 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1653 return vcpu;
1654 }
1655 return NULL;
1656}
1657
1658bool kvm_arch_has_irq_bypass(void)
1659{
1660 return true;
1661}
1662
1663int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
1664 struct irq_bypass_producer *prod)
1665{
1666 struct kvm_kernel_irqfd *irqfd =
1667 container_of(cons, struct kvm_kernel_irqfd, consumer);
1668
1669 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
1670 &irqfd->irq_entry);
1671}
1672void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
1673 struct irq_bypass_producer *prod)
1674{
1675 struct kvm_kernel_irqfd *irqfd =
1676 container_of(cons, struct kvm_kernel_irqfd, consumer);
1677
1678 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
1679 &irqfd->irq_entry);
1680}
1681
1682void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
1683{
1684 struct kvm_kernel_irqfd *irqfd =
1685 container_of(cons, struct kvm_kernel_irqfd, consumer);
1686
1687 kvm_arm_halt_guest(irqfd->kvm);
1688}
1689
1690void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
1691{
1692 struct kvm_kernel_irqfd *irqfd =
1693 container_of(cons, struct kvm_kernel_irqfd, consumer);
1694
1695 kvm_arm_resume_guest(irqfd->kvm);
1696}
1697
1698/**
1699 * Initialize Hyp-mode and memory mappings on all CPUs.
1700 */
1701int kvm_arch_init(void *opaque)
1702{
1703 int err;
1704 int ret, cpu;
1705 bool in_hyp_mode;
1706
1707 if (!is_hyp_mode_available()) {
1708 kvm_info("HYP mode not available\n");
1709 return -ENODEV;
1710 }
1711
David Brazdil0f672f62019-12-10 10:32:29 +00001712 in_hyp_mode = is_kernel_in_hyp_mode();
1713
1714 if (!in_hyp_mode && kvm_arch_requires_vhe()) {
1715 kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001716 return -ENODEV;
1717 }
1718
1719 for_each_online_cpu(cpu) {
1720 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1721 if (ret < 0) {
1722 kvm_err("Error, CPU %d not supported!\n", cpu);
1723 return -ENODEV;
1724 }
1725 }
1726
1727 err = init_common_resources();
1728 if (err)
1729 return err;
1730
David Brazdil0f672f62019-12-10 10:32:29 +00001731 err = kvm_arm_init_sve();
1732 if (err)
1733 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001734
1735 if (!in_hyp_mode) {
1736 err = init_hyp_mode();
1737 if (err)
1738 goto out_err;
1739 }
1740
1741 err = init_subsystems();
1742 if (err)
1743 goto out_hyp;
1744
1745 if (in_hyp_mode)
1746 kvm_info("VHE mode initialized successfully\n");
1747 else
1748 kvm_info("Hyp mode initialized successfully\n");
1749
1750 return 0;
1751
1752out_hyp:
1753 if (!in_hyp_mode)
1754 teardown_hyp_mode();
1755out_err:
1756 return err;
1757}
1758
1759/* NOP: Compiling as a module not supported */
1760void kvm_arch_exit(void)
1761{
1762 kvm_perf_teardown();
1763}
1764
1765static int arm_init(void)
1766{
1767 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1768 return rc;
1769}
1770
1771module_init(arm_init);