Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 711fca9..549591d 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -7,7 +7,7 @@
menuconfig VIRTUALIZATION
bool "Virtualization"
- ---help---
+ help
Say Y here to get to see options for using your Linux host to run
other operating systems inside virtual machines (guests).
This option alone does not add any kernel code.
@@ -54,7 +54,7 @@
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
- ---help---
+ help
Support running unmodified book3s_32 guest kernels
in virtual machines on book3s_32 host processors.
@@ -70,7 +70,7 @@
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
- ---help---
+ help
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
@@ -85,7 +85,7 @@
select KVM_BOOK3S_HV_POSSIBLE
select MMU_NOTIFIER
select CMA
- ---help---
+ help
Support running unmodified book3s_64 guest kernels in
virtual machines on POWER7 and newer processors that have
hypervisor mode available to the host.
@@ -104,7 +104,7 @@
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
select KVM_BOOK3S_PR_POSSIBLE
- ---help---
+ help
Support running guest kernels in virtual machines on processors
without using hypervisor mode in the host, by running the
guest in user mode (problem state) and emulating all
@@ -119,7 +119,7 @@
config KVM_BOOK3S_HV_EXIT_TIMING
bool "Detailed timing for hypervisor real-mode code"
depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
- ---help---
+ help
Calculate time taken for each vcpu in the real-mode guest entry,
exit, and interrupt handling code, plus time spent in the guest
and in nap mode due to idle (cede) while other threads are still
@@ -136,7 +136,7 @@
config KVM_EXIT_TIMING
bool "Detailed exit timing"
depends on KVM_E500V2 || KVM_E500MC
- ---help---
+ help
Calculate elapsed time for every exit/enter cycle. A per-vcpu
report is available in debugfs kvm/vm#_vcpu#_timing.
The overhead is relatively small, however it is not recommended for
@@ -150,7 +150,7 @@
select KVM
select KVM_MMIO
select MMU_NOTIFIER
- ---help---
+ help
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -166,7 +166,7 @@
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
- ---help---
+ help
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
@@ -194,7 +194,7 @@
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
default y
- ---help---
+ help
Include support for the XICS (eXternal Interrupt Controller
Specification) interrupt controller architecture used on
IBM POWER (pSeries) servers.
@@ -204,6 +204,4 @@
default y
depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
-source "drivers/vhost/Kconfig"
-
endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4c67cc7..2bfeaa1 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -71,6 +71,9 @@
book3s_64_mmu_radix.o \
book3s_hv_nested.o
+kvm-hv-$(CONFIG_PPC_UV) += \
+ book3s_hv_uvmem.o
+
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 1ff971f..44bf567 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -36,65 +36,41 @@
#include "book3s.h"
#include "trace.h"
-#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
-#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
-
/* #define EXIT_DEBUG */
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "exits", VCPU_STAT(sum_exits) },
- { "mmio", VCPU_STAT(mmio_exits) },
- { "sig", VCPU_STAT(signal_exits) },
- { "sysc", VCPU_STAT(syscall_exits) },
- { "inst_emu", VCPU_STAT(emulated_inst_exits) },
- { "dec", VCPU_STAT(dec_exits) },
- { "ext_intr", VCPU_STAT(ext_intr_exits) },
- { "queue_intr", VCPU_STAT(queue_intr) },
- { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) },
- { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) },
- { "halt_wait_ns", VCPU_STAT(halt_wait_ns) },
- { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
- { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
- { "halt_successful_wait", VCPU_STAT(halt_successful_wait) },
- { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "pf_storage", VCPU_STAT(pf_storage) },
- { "sp_storage", VCPU_STAT(sp_storage) },
- { "pf_instruc", VCPU_STAT(pf_instruc) },
- { "sp_instruc", VCPU_STAT(sp_instruc) },
- { "ld", VCPU_STAT(ld) },
- { "ld_slow", VCPU_STAT(ld_slow) },
- { "st", VCPU_STAT(st) },
- { "st_slow", VCPU_STAT(st_slow) },
- { "pthru_all", VCPU_STAT(pthru_all) },
- { "pthru_host", VCPU_STAT(pthru_host) },
- { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
- { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) },
- { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) },
+ VCPU_STAT("exits", sum_exits),
+ VCPU_STAT("mmio", mmio_exits),
+ VCPU_STAT("sig", signal_exits),
+ VCPU_STAT("sysc", syscall_exits),
+ VCPU_STAT("inst_emu", emulated_inst_exits),
+ VCPU_STAT("dec", dec_exits),
+ VCPU_STAT("ext_intr", ext_intr_exits),
+ VCPU_STAT("queue_intr", queue_intr),
+ VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+ VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ VCPU_STAT("halt_wait_ns", halt_wait_ns),
+ VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ VCPU_STAT("halt_successful_wait", halt_successful_wait),
+ VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ VCPU_STAT("halt_wakeup", halt_wakeup),
+ VCPU_STAT("pf_storage", pf_storage),
+ VCPU_STAT("sp_storage", sp_storage),
+ VCPU_STAT("pf_instruc", pf_instruc),
+ VCPU_STAT("sp_instruc", sp_instruc),
+ VCPU_STAT("ld", ld),
+ VCPU_STAT("ld_slow", ld_slow),
+ VCPU_STAT("st", st),
+ VCPU_STAT("st_slow", st_slow),
+ VCPU_STAT("pthru_all", pthru_all),
+ VCPU_STAT("pthru_host", pthru_host),
+ VCPU_STAT("pthru_bad_aff", pthru_bad_aff),
+ VM_STAT("largepages_2M", num_2M_pages, .mode = 0444),
+ VM_STAT("largepages_1G", num_1G_pages, .mode = 0444),
{ NULL }
};
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
- ulong pc = kvmppc_get_pc(vcpu);
- ulong lr = kvmppc_get_lr(vcpu);
- if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
- if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
- }
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
- if (!is_kvmppc_hv_enabled(vcpu->kvm))
- return to_book3s(vcpu)->hior;
- return 0;
-}
-
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
unsigned long pending_now, unsigned long old_pending)
{
@@ -134,11 +110,7 @@
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- kvmppc_unfixup_split_real(vcpu);
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
- kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
- vcpu->arch.mmu.reset_msr(vcpu);
+ vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
}
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
@@ -496,11 +468,6 @@
}
EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
return 0;
@@ -591,12 +558,12 @@
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
@@ -788,9 +755,9 @@
}
EXPORT_SYMBOL_GPL(kvmppc_set_msr);
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
+ return vcpu->kvm->arch.kvm_ops->vcpu_run(vcpu);
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -814,9 +781,9 @@
kvm_vcpu_kick(vcpu);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
@@ -829,21 +796,19 @@
return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
}
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+
+}
+
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
}
-void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- kvm->arch.kvm_ops->free_memslot(free, dont);
-}
-
-int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return kvm->arch.kvm_ops->create_memslot(slot, npages);
+ kvm->arch.kvm_ops->free_memslot(slot);
}
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
@@ -853,9 +818,11 @@
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
- return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
+ return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem,
+ change);
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
@@ -889,11 +856,6 @@
return 0;
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
@@ -917,13 +879,15 @@
#ifdef CONFIG_KVM_XICS
/*
- * Free the XIVE devices which are not directly freed by the
+ * Free the XIVE and XICS devices which are not directly freed by the
* device 'release' method
*/
kfree(kvm->arch.xive_devices.native);
kvm->arch.xive_devices.native = NULL;
kfree(kvm->arch.xive_devices.xics_on_xive);
kvm->arch.xive_devices.xics_on_xive = NULL;
+ kfree(kvm->arch.xics_device);
+ kvm->arch.xics_device = NULL;
#endif /* CONFIG_KVM_XICS */
}
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 2ef1311..9b6323e 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -16,8 +16,9 @@
extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
int sprn, ulong spr_val);
@@ -32,4 +33,7 @@
static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
#endif
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 18f244a..3fbd570 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -90,11 +90,6 @@
return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
}
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
- kvmppc_set_msr(vcpu, 0);
-}
-
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
@@ -239,7 +234,7 @@
case 2:
case 6:
pte->may_write = true;
- /* fall through */
+ fallthrough;
case 3:
case 5:
case 7:
@@ -406,7 +401,6 @@
mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
mmu->xlate = kvmppc_mmu_book3s_32_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index d4cb3bc..e8e7b2c 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -356,7 +356,7 @@
/* From mm/mmu_context_hash32.c */
#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
-int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 5f63a5f..26b8b27 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -24,20 +24,6 @@
#define dprintk(X...) do { } while(0)
#endif
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
- unsigned long cur_msr = kvmppc_get_msr(vcpu);
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(cur_msr))
- msr |= MSR_TS_S;
- else
- msr |= cur_msr & MSR_TS_MASK;
-
- kvmppc_set_msr(vcpu, msr);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
struct kvm_vcpu *vcpu,
gva_t eaddr)
@@ -325,7 +311,7 @@
case 2:
case 6:
gpte->may_write = true;
- /* fall through */
+ fallthrough;
case 3:
case 5:
case 7:
@@ -676,7 +662,6 @@
mmu->slbie = kvmppc_mmu_book3s_64_slbie;
mmu->slbia = kvmppc_mmu_book3s_64_slbia;
mmu->xlate = kvmppc_mmu_book3s_64_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 044dd49..e452158 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -384,7 +384,7 @@
__destroy_context(to_book3s(vcpu)->context_id[0]);
}
-int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9a75f0e..38ea396 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -260,11 +260,15 @@
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
- /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
host_lpid = 0;
if (cpu_has_feature(CPU_FTR_HVMODE))
host_lpid = mfspr(SPRN_LPID);
- rsvd_lpid = LPID_RSVD;
+
+ /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ rsvd_lpid = LPID_RSVD;
+ else
+ rsvd_lpid = LPID_RSVD_POWER7;
kvmppc_init_lpid(rsvd_lpid + 1);
@@ -275,29 +279,16 @@
return 0;
}
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
- msr |= MSR_TS_S;
- else
- msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
- kvmppc_set_msr(vcpu, msr);
-}
-
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
{
long ret;
- /* Protect linux PTE lookup from page table destruction */
- rcu_read_lock_sched(); /* this disables preemption too */
+ preempt_disable();
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
- current->mm->pgd, false, pte_idx_ret);
- rcu_read_unlock_sched();
+ kvm->mm->pgd, false, pte_idx_ret);
+ preempt_enable();
if (ret == H_TOO_HARD) {
/* this can't happen */
pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
@@ -425,7 +416,7 @@
return (instr & mask) != 0;
}
-int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
unsigned long gpa, gva_t ea, int is_store)
{
u32 last_inst;
@@ -485,10 +476,10 @@
vcpu->arch.paddr_accessed = gpa;
vcpu->arch.vaddr_accessed = ea;
- return kvmppc_emulate_mmio(run, vcpu);
+ return kvmppc_emulate_mmio(vcpu);
}
-int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr)
{
struct kvm *kvm = vcpu->kvm;
@@ -497,20 +488,21 @@
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
- unsigned long gpa, gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
- struct page *page, *pages[1];
- long index, ret, npages;
+ struct page *page;
+ long index, ret;
bool is_ci;
- unsigned int writing, write_ok;
- struct vm_area_struct *vma;
+ bool writing, write_ok;
+ unsigned int shift;
unsigned long rcbits;
long mmio_update;
+ pte_t pte, *ptep;
if (kvm_is_radix(kvm))
- return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
+ return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr);
/*
* Real-mode code has already searched the HPT and found the
@@ -530,7 +522,7 @@
gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1));
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
}
@@ -566,7 +558,7 @@
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
/*
@@ -581,59 +573,63 @@
smp_rmb();
ret = -EFAULT;
- is_ci = false;
- pfn = 0;
page = NULL;
- pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
- if (npages < 1) {
- /* Check if it's an I/O mapping */
- down_read(¤t->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
- if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
- (vma->vm_flags & VM_PFNMAP)) {
- pfn = vma->vm_pgoff +
- ((hva - vma->vm_start) >> PAGE_SHIFT);
- pte_size = psize;
- is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
- write_ok = vma->vm_flags & VM_WRITE;
- }
- up_read(¤t->mm->mmap_sem);
- if (!pfn)
- goto out_put;
+
+ /*
+ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+ * do it with !atomic && !async, which is how we call it.
+ * We always ask for write permission since the common case
+ * is that the page is writable.
+ */
+ if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
+ write_ok = true;
} else {
- page = pages[0];
- pfn = page_to_pfn(page);
- if (PageHuge(page)) {
- page = compound_head(page);
- pte_size <<= compound_order(page);
- }
- /* if the guest wants write access, see if that is OK */
- if (!writing && hpte_is_writable(r)) {
- pte_t *ptep, pte;
- unsigned long flags;
- /*
- * We need to protect against page table destruction
- * hugepage split and collapse.
- */
- local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd,
- hva, NULL, NULL);
- if (ptep) {
- pte = kvmppc_read_update_linux_pte(ptep, 1);
- if (__pte_write(pte))
- write_ok = 1;
- }
- local_irq_restore(flags);
+ /* Call KVM generic code to do the slow-path check */
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ writing, &write_ok);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+ page = NULL;
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ page = NULL;
}
}
+ /*
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
+ */
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+ pte = __pte(0);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ spin_unlock(&kvm->mmu_lock);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!pte_present(pte)) {
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
+ }
+ hpa = pte_pfn(pte) << PAGE_SHIFT;
+ pte_size = PAGE_SIZE;
+ if (shift)
+ pte_size = 1ul << shift;
+ is_ci = pte_ci(pte);
+
if (psize > pte_size)
goto out_put;
+ if (pte_size > psize)
+ hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_ci)) {
@@ -647,14 +643,13 @@
}
/*
- * Set the HPTE to point to pfn.
- * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * Set the HPTE to point to hpa.
+ * Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE.
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
- ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
@@ -719,20 +714,13 @@
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
- SetPageDirty(page);
+ set_page_dirty_lock(page);
out_put:
trace_kvm_page_fault_exit(vcpu, hpte, ret);
- if (page) {
- /*
- * We drop pages[0] here, not page because page might
- * have been set to the head page of a compound, but
- * we have to drop the reference on the correct tail
- * page to match the get inside gup()
- */
- put_page(pages[0]);
- }
+ if (page)
+ put_page(page);
return ret;
out_unlock:
@@ -2000,7 +1988,7 @@
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
kfree(ctx);
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
return ret;
}
@@ -2149,9 +2137,8 @@
void kvmppc_mmu_debugfs_init(struct kvm *kvm)
{
- kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
- kvm->arch.debugfs_dir, kvm,
- &debugfs_htab_fops);
+ debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
+ &debugfs_htab_fops);
}
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
@@ -2161,7 +2148,6 @@
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 9d73448..04028f9 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -11,14 +11,16 @@
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/debugfs.h>
+#include <linux/pgtable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
+#include <asm/ultravisor.h>
+#include <asm/kvm_book3s_uvmem.h>
/*
* Supported radix tree geometry.
@@ -31,7 +33,7 @@
gva_t eaddr, void *to, void *from,
unsigned long n)
{
- int uninitialized_var(old_pid), old_lpid;
+ int old_pid, old_lpid;
unsigned long quadrant, ret = n;
bool is_load = !!to;
@@ -64,9 +66,9 @@
pagefault_disable();
if (is_load)
- ret = raw_copy_from_user(to, from, n);
+ ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
else
- ret = raw_copy_to_user(to, from, n);
+ ret = __copy_to_user_inatomic((void __user *)to, from, n);
pagefault_enable();
/* switch the pid first to avoid running host with unallocated pid */
@@ -161,7 +163,9 @@
return -EINVAL;
/* Read the entry from guest memory */
addr = base + (index * sizeof(rpte));
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (ret) {
if (pte_ret_p)
*pte_ret_p = addr;
@@ -237,7 +241,9 @@
/* Read the table to find the root of the radix tree */
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (ret)
return ret;
@@ -343,7 +349,7 @@
return __radix_pte_update(ptep, clr, set);
}
-void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
+static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
@@ -430,15 +436,19 @@
* Callers are responsible for flushing the PWC.
*
* When page tables are being unmapped/freed as part of page fault path
- * (full == false), ptes are not expected. There is code to unmap them
- * and emit a warning if encountered, but there may already be data
- * corruption due to the unexpected mappings.
+ * (full == false), valid ptes are generally not expected; however, there
+ * is one situation where they arise, which is when dirty page logging is
+ * turned off for a memslot while the VM is running. The new memslot
+ * becomes visible to page faults before the memslot commit function
+ * gets to flush the memslot, which can lead to a 2MB page mapping being
+ * installed for a guest physical address where there are already 64kB
+ * (or 4kB) mappings (of sub-pages of the same 2MB page).
*/
static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
unsigned int lpid)
{
if (full) {
- memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+ memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE);
} else {
pte_t *p = pte;
unsigned long it;
@@ -446,7 +456,6 @@
for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
if (pte_val(*p) == 0)
continue;
- WARN_ON_ONCE(1);
kvmppc_unmap_pte(kvm, p,
pte_pfn(*p) << PAGE_SHIFT,
PAGE_SHIFT, NULL, lpid);
@@ -512,13 +521,14 @@
unsigned long ig;
for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+ p4d_t *p4d = p4d_offset(pgd, 0);
pud_t *pud;
- if (!pgd_present(*pgd))
+ if (!p4d_present(*p4d))
continue;
- pud = pud_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
kvmppc_unmap_free_pud(kvm, pud, lpid);
- pgd_clear(pgd);
+ p4d_clear(p4d);
}
}
@@ -579,6 +589,7 @@
unsigned long *rmapp, struct rmap_nested **n_rmap)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud, *new_pud = NULL;
pmd_t *pmd, *new_pmd = NULL;
pte_t *ptep, *new_ptep = NULL;
@@ -586,9 +597,11 @@
/* Traverse the guest's 2nd-level tree, allocate new levels needed */
pgd = pgtable + pgd_index(gpa);
+ p4d = p4d_offset(pgd, gpa);
+
pud = NULL;
- if (pgd_present(*pgd))
- pud = pud_offset(pgd, gpa);
+ if (p4d_present(*p4d))
+ pud = pud_offset(p4d, gpa);
else
new_pud = pud_alloc_one(kvm->mm, gpa);
@@ -609,13 +622,13 @@
/* Now traverse again under the lock and change the tree */
ret = -ENOMEM;
- if (pgd_none(*pgd)) {
+ if (p4d_none(*p4d)) {
if (!new_pud)
goto out_unlock;
- pgd_populate(kvm->mm, pgd, new_pud);
+ p4d_populate(kvm->mm, p4d, new_pud);
new_pud = NULL;
}
- pud = pud_offset(pgd, gpa);
+ pud = pud_offset(p4d, gpa);
if (pud_is_leaf(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
@@ -748,7 +761,7 @@
return ret;
}
-bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
+bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing,
unsigned long gpa, unsigned int lpid)
{
unsigned long pgflags;
@@ -763,12 +776,12 @@
pgflags = _PAGE_ACCESSED;
if (writing)
pgflags |= _PAGE_DIRTY;
- /*
- * We are walking the secondary (partition-scoped) page table here.
- * We can do this without disabling irq because the Linux MM
- * subsystem doesn't do THP splits and collapses on this tree.
- */
- ptep = __find_linux_pte(pgtable, gpa, NULL, &shift);
+
+ if (nested)
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+ else
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+
if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
return true;
@@ -804,7 +817,7 @@
* is that the page is writable.
*/
hva = gfn_to_hva_memslot(memslot, gfn);
- if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
upgrade_write = true;
} else {
unsigned long pfn;
@@ -826,20 +839,21 @@
* Read the PTE from the process' radix tree and use that
* so we get the shift and attribute bits.
*/
- local_irq_disable();
- ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+ pte = __pte(0);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ spin_unlock(&kvm->mmu_lock);
/*
* If the PTE disappeared temporarily due to a THP
* collapse, just return and let the guest try again.
*/
- if (!ptep) {
- local_irq_enable();
+ if (!pte_present(pte)) {
if (page)
put_page(page);
return RESUME_GUEST;
}
- pte = *ptep;
- local_irq_enable();
/* If we're logging dirty pages, always map single pages */
large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
@@ -899,7 +913,7 @@
return ret;
}
-int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr)
{
struct kvm *kvm = vcpu->kvm;
@@ -928,6 +942,9 @@
if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return kvmppc_send_page_to_uv(kvm, gfn);
+
/* Get the corresponding memslot */
memslot = gfn_to_memslot(kvm, gfn);
@@ -942,7 +959,7 @@
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
return RESUME_GUEST;
}
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
}
if (memslot->flags & KVM_MEM_READONLY) {
@@ -958,8 +975,8 @@
/* Failed to set the reference/change bits */
if (dsisr & DSISR_SET_RC) {
spin_lock(&kvm->mmu_lock);
- if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
- writing, gpa, kvm->arch.lpid))
+ if (kvmppc_hv_handle_set_rc(kvm, false, writing,
+ gpa, kvm->arch.lpid))
dsisr &= ~DSISR_SET_RC;
spin_unlock(&kvm->mmu_lock);
@@ -985,11 +1002,16 @@
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
+ uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
+ return 0;
+ }
+
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
kvm->arch.lpid);
- return 0;
+ return 0;
}
/* Called with kvm->mmu_lock held */
@@ -1002,7 +1024,10 @@
int ref = 0;
unsigned long old, *rmapp;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ref;
+
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
gpa, shift);
@@ -1026,7 +1051,10 @@
unsigned int shift;
int ref = 0;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ref;
+
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
return ref;
@@ -1038,17 +1066,43 @@
{
unsigned long gfn = memslot->base_gfn + pagenum;
unsigned long gpa = gfn << PAGE_SHIFT;
- pte_t *ptep;
+ pte_t *ptep, pte;
unsigned int shift;
int ret = 0;
unsigned long old, *rmapp;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
- if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
- ret = 1;
- if (shift)
- ret = 1 << (shift - PAGE_SHIFT);
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ret;
+
+ /*
+ * For performance reasons we don't hold kvm->mmu_lock while walking the
+ * partition scoped table.
+ */
+ ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
+ if (!ptep)
+ return 0;
+
+ pte = READ_ONCE(*ptep);
+ if (pte_present(pte) && pte_dirty(pte)) {
spin_lock(&kvm->mmu_lock);
+ /*
+ * Recheck the pte again
+ */
+ if (pte_val(pte) != pte_val(*ptep)) {
+ /*
+ * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can
+ * only find PAGE_SIZE pte entries here. We can continue
+ * to use the pte addr returned by above page table
+ * walk.
+ */
+ if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
+ spin_unlock(&kvm->mmu_lock);
+ return 0;
+ }
+ }
+
+ ret = 1;
+ VM_BUG_ON(shift);
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
@@ -1095,10 +1149,16 @@
unsigned long gpa;
unsigned int shift;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return;
+
gpa = memslot->base_gfn << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
for (n = memslot->npages; n; --n) {
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
kvm->arch.lpid);
@@ -1214,7 +1274,8 @@
unsigned long gpa;
pgd_t *pgt;
struct kvm_nested_guest *nested;
- pgd_t pgd, *pgdp;
+ pgd_t *pgdp;
+ p4d_t p4d, *p4dp;
pud_t pud, *pudp;
pmd_t pmd, *pmdp;
pte_t *ptep;
@@ -1287,13 +1348,14 @@
}
pgdp = pgt + pgd_index(gpa);
- pgd = READ_ONCE(*pgdp);
- if (!(pgd_val(pgd) & _PAGE_PRESENT)) {
- gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE;
+ p4dp = p4d_offset(pgdp, gpa);
+ p4d = READ_ONCE(*p4dp);
+ if (!(p4d_val(p4d) & _PAGE_PRESENT)) {
+ gpa = (gpa & P4D_MASK) + P4D_SIZE;
continue;
}
- pudp = pud_offset(&pgd, gpa);
+ pudp = pud_offset(&p4d, gpa);
pud = READ_ONCE(*pudp);
if (!(pud_val(pud) & _PAGE_PRESENT)) {
gpa = (gpa & PUD_MASK) + PUD_SIZE;
@@ -1371,9 +1433,8 @@
void kvmhv_radix_debugfs_init(struct kvm *kvm)
{
- kvm->arch.radix_dentry = debugfs_create_file("radix", 0400,
- kvm->arch.debugfs_dir, kvm,
- &debugfs_radix_fops);
+ debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
+ &debugfs_radix_fops);
}
int kvmppc_radix_init(void)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 03b9474..8da93fd 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -27,7 +27,6 @@
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
@@ -262,10 +261,11 @@
}
}
+ account_locked_vm(kvm->mm,
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+
kvm_put_kvm(stt->kvm);
- account_locked_vm(current->mm,
- kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
return 0;
@@ -281,15 +281,16 @@
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
+ struct mm_struct *mm = kvm->mm;
unsigned long npages, size = args->size;
- int ret = -ENOMEM;
+ int ret;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
return -EINVAL;
npages = kvmppc_tce_pages(size);
- ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
+ ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
if (ret)
return ret;
@@ -326,7 +327,7 @@
if (ret >= 0)
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
else
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_unlock(&kvm->lock);
@@ -335,7 +336,7 @@
kfree(stt);
fail_acct:
- account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
+ account_locked_vm(mm, kvmppc_stt_pages(npages), false);
return ret;
}
@@ -488,7 +489,7 @@
return ret;
}
-long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+static long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry, unsigned long ua,
enum dma_data_direction dir)
{
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 35fd67b..e5ba96c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -24,7 +24,6 @@
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
@@ -33,7 +32,7 @@
#ifdef CONFIG_BUG
#define WARN_ON_ONCE_RM(condition) ({ \
- static bool __section(.data.unlikely) __warned; \
+ static bool __section(".data.unlikely") __warned; \
int __ret_warn_once = !!(condition); \
\
if (unlikely(__ret_warn_once && !__warned)) { \
@@ -75,8 +74,8 @@
EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap)
+static long kvmppc_rm_tce_to_ua(struct kvm *kvm,
+ unsigned long tce, unsigned long *ua)
{
unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
@@ -88,9 +87,6 @@
*ua = __gfn_to_hva_memslot(memslot, gfn) |
(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
- if (prmap)
- *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
-
return 0;
}
@@ -117,7 +113,7 @@
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
- if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -212,7 +208,7 @@
idx = (ioba >> stt->page_shift) - stt->offset;
sttpage = idx / TCES_PER_PAGE;
- sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
+ sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
TCES_PER_PAGE;
for (i = sttpage; i < sttpage + sttpages; ++i)
if (!stt->pages[i])
@@ -244,7 +240,7 @@
return ret;
}
-extern void iommu_tce_kill_rm(struct iommu_table *tbl,
+static void iommu_tce_kill_rm(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
@@ -415,7 +411,7 @@
return ret;
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -441,8 +437,8 @@
return H_SUCCESS;
}
-static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
- unsigned long ua, unsigned long *phpa)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+ unsigned long ua, unsigned long *phpa)
{
pte_t *ptep, pte;
unsigned shift = 0;
@@ -456,10 +452,17 @@
* to exit which will agains result in the below page table walk
* to finish.
*/
- ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
- if (!ptep || !pte_present(*ptep))
+ /* an rmap lock won't make it safe. because that just ensure hash
+ * page table entries are removed with rmap lock held. After that
+ * mmu notifier returns and we go ahead and removing ptes from Qemu page table.
+ */
+ ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift);
+ if (!ptep)
return -ENXIO;
- pte = *ptep;
+
+ pte = READ_ONCE(*ptep);
+ if (!pte_present(pte))
+ return -ENXIO;
if (!shift)
shift = PAGE_SHIFT;
@@ -481,10 +484,11 @@
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
+ struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
- unsigned long *rmap = NULL;
+ unsigned long mmu_seq;
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
@@ -492,6 +496,12 @@
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
+ /*
+ * used to check for invalidations in progress
+ */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -519,7 +529,7 @@
*/
struct mm_iommu_table_group_mem_t *mem;
- if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -535,23 +545,11 @@
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
- if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua))
return H_TOO_HARD;
- rmap = (void *) vmalloc_to_phys(rmap);
- if (WARN_ON_ONCE_RM(!rmap))
- return H_TOO_HARD;
-
- /*
- * Synchronize with the MMU notifier callbacks in
- * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
- * While we have the rmap lock, code running on other CPUs
- * cannot finish unmapping the host real page that backs
- * this guest real page, so we are OK to access the host
- * real page.
- */
- lock_rmap(rmap);
- if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -569,7 +567,7 @@
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
goto invalidate_exit;
}
@@ -594,9 +592,8 @@
iommu_tce_kill_rm(stit->tbl, entry, npages);
unlock_exit:
- if (rmap)
- unlock_rmap(rmap);
-
+ if (!prereg)
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index dad71d2..0effd48 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -235,7 +235,7 @@
#endif
-int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
@@ -371,13 +371,13 @@
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE)
break;
- run->papr_hcall.nr = cmd;
+ vcpu->run->papr_hcall.nr = cmd;
for (i = 0; i < 9; ++i) {
ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
- run->papr_hcall.args[i] = gpr;
+ vcpu->run->papr_hcall.args[i] = gpr;
}
- run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
emulated = EMULATE_EXIT_USER;
break;
@@ -629,7 +629,7 @@
}
if (emulated == EMULATE_FAIL)
- emulated = kvmppc_emulate_paired_single(run, vcpu);
+ emulated = kvmppc_emulate_paired_single(vcpu);
return emulated;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6c99ccc..527c205 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -73,6 +73,9 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/hw_breakpoint.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/ultravisor.h>
+#include <asm/dtl.h>
#include "book3s.h"
@@ -109,7 +112,7 @@
MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
#ifdef CONFIG_KVM_XICS
-static struct kernel_param_ops module_param_ops = {
+static const struct kernel_param_ops module_param_ops = {
.set = param_set_int,
.get = param_get_int,
};
@@ -134,7 +137,6 @@
/* If set, the threads on each CPU core have to be in the same MMU mode */
static bool no_mixing_hpt_and_radix;
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -230,13 +232,11 @@
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- struct swait_queue_head *wqp;
+ struct rcuwait *waitp;
- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swq_has_sleeper(wqp)) {
- swake_up_one(wqp);
+ waitp = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(waitp))
++vcpu->stat.halt_wakeup;
- }
cpu = READ_ONCE(vcpu->arch.thread_cpu);
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -339,25 +339,13 @@
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
-{
- /*
- * Check for illegal transactional state bit combination
- * and if we find it, force the TS field to a safe state.
- */
- if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
- msr &= ~MSR_TS_MASK;
- vcpu->arch.shregs.msr = msr;
- kvmppc_end_cede(vcpu);
-}
-
static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
}
/* Dummy value used in computing PCR value below */
-#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
+#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
@@ -365,7 +353,9 @@
struct kvmppc_vcore *vc = vcpu->arch.vcore;
/* We can (emulate) our own architecture version and anything older */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ host_pcr_bit = PCR_ARCH_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
host_pcr_bit = PCR_ARCH_300;
else if (cpu_has_feature(CPU_FTR_ARCH_207S))
host_pcr_bit = PCR_ARCH_207;
@@ -391,6 +381,9 @@
case PVR_ARCH_300:
guest_pcr_bit = PCR_ARCH_300;
break;
+ case PVR_ARCH_31:
+ guest_pcr_bit = PCR_ARCH_31;
+ break;
default:
return -EINVAL;
}
@@ -781,7 +774,7 @@
return H_P3;
vcpu->arch.ciabr = value1;
return H_SUCCESS;
- case H_SET_MODE_RESOURCE_SET_DAWR:
+ case H_SET_MODE_RESOURCE_SET_DAWR0:
if (!kvmppc_power8_compatible(vcpu))
return H_P2;
if (!ppc_breakpoint_available())
@@ -793,6 +786,11 @@
vcpu->arch.dawr = value1;
vcpu->arch.dawrx = value2;
return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ /* KVM does not support mflags=2 (AIL=2) */
+ if (mflags != 0 && mflags != 3)
+ return H_UNSUPPORTED_FLAG_START;
+ return H_TOO_HARD;
default:
return H_TOO_HARD;
}
@@ -1079,6 +1077,43 @@
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
+ case H_SVM_PAGE_IN:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_PAGE_OUT:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_INIT_START:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ break;
+ case H_SVM_INIT_DONE:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ break;
+ case H_SVM_INIT_ABORT:
+ /*
+ * Even if that call is made by the Ultravisor, the SSR1 value
+ * is the guest context one, with the secure bit clear as it has
+ * not yet been secured. So we can't check it here.
+ * Instead the kvm->arch.secure_guest flag is checked inside
+ * kvmppc_h_svm_init_abort().
+ */
+ ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ break;
+
default:
return RESUME_HOST;
}
@@ -1131,8 +1166,7 @@
return kvmppc_hcall_impl_hv_realmode(cmd);
}
-static int kvmppc_emulate_debug_inst(struct kvm_run *run,
- struct kvm_vcpu *vcpu)
+static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu)
{
u32 last_inst;
@@ -1146,8 +1180,8 @@
}
if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
- run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.address = kvmppc_get_pc(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
return RESUME_HOST;
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
@@ -1248,9 +1282,10 @@
return RESUME_GUEST;
}
-static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
vcpu->stat.sum_exits++;
@@ -1385,7 +1420,7 @@
swab32(vcpu->arch.emul_inst) :
vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
- r = kvmppc_emulate_debug_inst(run, vcpu);
+ r = kvmppc_emulate_debug_inst(vcpu);
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1437,7 +1472,7 @@
return r;
}
-static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
@@ -1495,7 +1530,7 @@
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmhv_nested_page_fault(run, vcpu);
+ r = kvmhv_nested_page_fault(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
@@ -1505,7 +1540,7 @@
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmhv_nested_page_fault(run, vcpu);
+ r = kvmhv_nested_page_fault(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
break;
@@ -1654,10 +1689,22 @@
case KVM_REG_PPC_UAMOR:
*val = get_reg_val(id, vcpu->arch.uamor);
break;
- case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
i = id - KVM_REG_PPC_MMCR0;
*val = get_reg_val(id, vcpu->arch.mmcr[i]);
break;
+ case KVM_REG_PPC_MMCR2:
+ *val = get_reg_val(id, vcpu->arch.mmcr[2]);
+ break;
+ case KVM_REG_PPC_MMCRA:
+ *val = get_reg_val(id, vcpu->arch.mmcra);
+ break;
+ case KVM_REG_PPC_MMCRS:
+ *val = get_reg_val(id, vcpu->arch.mmcrs);
+ break;
+ case KVM_REG_PPC_MMCR3:
+ *val = get_reg_val(id, vcpu->arch.mmcr[3]);
+ break;
case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
i = id - KVM_REG_PPC_PMC1;
*val = get_reg_val(id, vcpu->arch.pmc[i]);
@@ -1673,7 +1720,13 @@
*val = get_reg_val(id, vcpu->arch.sdar);
break;
case KVM_REG_PPC_SIER:
- *val = get_reg_val(id, vcpu->arch.sier);
+ *val = get_reg_val(id, vcpu->arch.sier[0]);
+ break;
+ case KVM_REG_PPC_SIER2:
+ *val = get_reg_val(id, vcpu->arch.sier[1]);
+ break;
+ case KVM_REG_PPC_SIER3:
+ *val = get_reg_val(id, vcpu->arch.sier[2]);
break;
case KVM_REG_PPC_IAMR:
*val = get_reg_val(id, vcpu->arch.iamr);
@@ -1875,10 +1928,22 @@
case KVM_REG_PPC_UAMOR:
vcpu->arch.uamor = set_reg_val(id, *val);
break;
- case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
i = id - KVM_REG_PPC_MMCR0;
vcpu->arch.mmcr[i] = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_MMCR2:
+ vcpu->arch.mmcr[2] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCRA:
+ vcpu->arch.mmcra = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCRS:
+ vcpu->arch.mmcrs = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCR3:
+ *val = get_reg_val(id, vcpu->arch.mmcr[3]);
+ break;
case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
i = id - KVM_REG_PPC_PMC1;
vcpu->arch.pmc[i] = set_reg_val(id, *val);
@@ -1894,7 +1959,13 @@
vcpu->arch.sdar = set_reg_val(id, *val);
break;
case KVM_REG_PPC_SIER:
- vcpu->arch.sier = set_reg_val(id, *val);
+ vcpu->arch.sier[0] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SIER2:
+ vcpu->arch.sier[1] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SIER3:
+ vcpu->arch.sier[2] = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAMR:
vcpu->arch.iamr = set_reg_val(id, *val);
@@ -2100,7 +2171,7 @@
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_swait_queue_head(&vcore->wq);
+ rcuwait_init(&vcore->wait);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = id;
@@ -2242,14 +2313,9 @@
struct kvm *kvm = vcpu->kvm;
snprintf(buf, sizeof(buf), "vcpu%u", id);
- if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
- return;
vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
- if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
- return;
- vcpu->arch.debugfs_timings =
- debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
- vcpu, &debugfs_timings_ops);
+ debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
+ &debugfs_timings_ops);
}
#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
@@ -2258,22 +2324,16 @@
}
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
-static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu;
int err;
int core;
struct kvmppc_vcore *vcore;
+ struct kvm *kvm;
+ unsigned int id;
- err = -ENOMEM;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
-
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ kvm = vcpu->kvm;
+ id = vcpu->vcpu_id;
vcpu->arch.shared = &vcpu->arch.shregs;
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -2304,7 +2364,7 @@
* to trap and then we emulate them.
*/
vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
- HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+ HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
if (cpu_has_feature(CPU_FTR_HVMODE)) {
vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -2357,7 +2417,7 @@
mutex_unlock(&kvm->lock);
if (!vcore)
- goto uninit_vcpu;
+ return err;
spin_lock(&vcore->lock);
++vcore->num_threads;
@@ -2372,14 +2432,7 @@
debugfs_vcpu_init(vcpu, id);
- return vcpu;
-
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
+ return 0;
}
static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
@@ -2433,8 +2486,6 @@
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
@@ -2459,15 +2510,6 @@
vcpu->arch.timer_running = 1;
}
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.ceded = 0;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
- }
-}
-
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
@@ -2941,7 +2983,7 @@
ret = RESUME_GUEST;
if (vcpu->arch.trap)
- ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+ ret = kvmppc_handle_exit_hv(vcpu,
vcpu->arch.run_task);
vcpu->arch.ret = ret;
@@ -3399,14 +3441,24 @@
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr = mfspr(SPRN_DAWR);
- unsigned long host_dawrx = mfspr(SPRN_DAWRX);
+ unsigned long host_dawr = mfspr(SPRN_DAWR0);
+ unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);
+ /*
+ * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
+ * so set HDICE before writing HDEC.
+ */
+ mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
+ isync();
+
hdec = time_limit - mftb();
- if (hdec < 0)
+ if (hdec < 0) {
+ mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+ isync();
return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ }
mtspr(SPRN_HDEC, hdec);
if (vc->tb_offset) {
@@ -3429,8 +3481,8 @@
mtspr(SPRN_SPURR, vcpu->arch.spurr);
if (dawr_enabled()) {
- mtspr(SPRN_DAWR, vcpu->arch.dawr);
- mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr);
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3482,8 +3534,8 @@
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR, host_dawr);
- mtspr(SPRN_DAWRX, host_dawrx);
+ mtspr(SPRN_DAWR0, host_dawr);
+ mtspr(SPRN_DAWRX0, host_dawrx);
mtspr(SPRN_PID, host_pidr);
/*
@@ -3492,6 +3544,13 @@
*/
asm volatile("eieio; tlbsync; ptesync");
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
isync();
@@ -3520,7 +3579,7 @@
* Virtual-mode guest entry for POWER9 and later when the host and
* guest are both using the radix MMU. The LPIDR has already been set.
*/
-int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -3528,13 +3587,14 @@
unsigned long host_tidr = mfspr(SPRN_TIDR);
unsigned long host_iamr = mfspr(SPRN_IAMR);
unsigned long host_amr = mfspr(SPRN_AMR);
+ unsigned long host_fscr = mfspr(SPRN_FSCR);
s64 dec;
u64 tb;
int trap, save_pmu;
dec = mfspr(SPRN_DEC);
tb = mftb();
- if (dec < 512)
+ if (dec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
local_paca->kvm_hstate.dec_expires = dec + tb;
if (local_paca->kvm_hstate.dec_expires < time_limit)
@@ -3683,6 +3743,9 @@
if (host_amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_amr);
+ if (host_fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, host_fscr);
+
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
@@ -3813,7 +3876,6 @@
ktime_t cur, start_poll, start_wait;
int do_sleep = 1;
u64 block_ns;
- DECLARE_SWAITQUEUE(wait);
/* Poll for pending exceptions and ceded state */
cur = start_poll = ktime_get();
@@ -3841,10 +3903,10 @@
}
}
- prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
+ prepare_to_rcuwait(&vc->wait);
+ set_current_state(TASK_INTERRUPTIBLE);
if (kvmppc_vcore_check_block(vc)) {
- finish_swait(&vc->wq, &wait);
+ finish_rcuwait(&vc->wait);
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
@@ -3858,7 +3920,7 @@
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
- finish_swait(&vc->wq, &wait);
+ finish_rcuwait(&vc->wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -3929,15 +3991,16 @@
return r;
}
-static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *run = vcpu->run;
int n_ceded, i, r;
struct kvmppc_vcore *vc;
struct kvm_vcpu *v;
trace_kvmppc_run_vcpu_enter(vcpu);
- kvm_run->exit_reason = 0;
+ run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
kvmppc_update_vpas(vcpu);
@@ -3949,7 +4012,6 @@
spin_lock(&vc->lock);
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
- vcpu->arch.kvm_run = kvm_run;
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
@@ -3969,7 +4031,7 @@
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- swake_up_one(&vc->wq);
+ rcuwait_wake_up(&vc->wait);
}
}
@@ -3982,8 +4044,8 @@
r = kvmhv_setup_mmu(vcpu);
spin_lock(&vc->lock);
if (r) {
- kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
- kvm_run->fail_entry.
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.
hardware_entry_failure_reason = 0;
vcpu->arch.ret = r;
break;
@@ -4002,7 +4064,7 @@
if (signal_pending(v->arch.run_task)) {
kvmppc_remove_runnable(vc, v);
v->stat.signal_exits++;
- v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+ v->run->exit_reason = KVM_EXIT_INTR;
v->arch.ret = -EINTR;
wake_up(&v->arch.cpu_run);
}
@@ -4043,7 +4105,7 @@
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
kvmppc_remove_runnable(vc, vcpu);
vcpu->stat.signal_exits++;
- kvm_run->exit_reason = KVM_EXIT_INTR;
+ run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
}
@@ -4054,15 +4116,15 @@
wake_up(&v->arch.cpu_run);
}
- trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+ trace_kvmppc_run_vcpu_exit(vcpu);
spin_unlock(&vc->lock);
return vcpu->arch.ret;
}
-int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
- struct kvm_vcpu *vcpu, u64 time_limit,
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
+ struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
int srcu_idx, lpid;
struct kvmppc_vcore *vc;
@@ -4071,14 +4133,13 @@
trace_kvmppc_run_vcpu_enter(vcpu);
- kvm_run->exit_reason = 0;
+ run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
vc = vcpu->arch.vcore;
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
- vcpu->arch.kvm_run = kvm_run;
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
@@ -4196,9 +4257,9 @@
r = RESUME_GUEST;
if (trap) {
if (!nested)
- r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
+ r = kvmppc_handle_exit_hv(vcpu, current);
else
- r = kvmppc_handle_nested_exit(kvm_run, vcpu);
+ r = kvmppc_handle_nested_exit(vcpu);
}
vcpu->arch.ret = r;
@@ -4208,7 +4269,7 @@
while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
if (signal_pending(current)) {
vcpu->stat.signal_exits++;
- kvm_run->exit_reason = KVM_EXIT_INTR;
+ run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
break;
}
@@ -4224,13 +4285,13 @@
done:
kvmppc_remove_runnable(vc, vcpu);
- trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+ trace_kvmppc_run_vcpu_exit(vcpu);
return vcpu->arch.ret;
sigpend:
vcpu->stat.signal_exits++;
- kvm_run->exit_reason = KVM_EXIT_INTR;
+ run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
out:
local_irq_enable();
@@ -4238,8 +4299,9 @@
goto done;
}
-static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *run = vcpu->run;
int r;
int srcu_idx;
unsigned long ebb_regs[3] = {}; /* shut up GCC */
@@ -4308,8 +4370,8 @@
}
user_vrsave = mfspr(SPRN_VRSAVE);
- vcpu->arch.wqp = &vcpu->arch.vcore->wq;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.waitp = &vcpu->arch.vcore->wait;
+ vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
@@ -4323,10 +4385,10 @@
*/
if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
!no_mixing_hpt_and_radix)
- r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0,
+ r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
- r = kvmppc_run_vcpu(run, vcpu);
+ r = kvmppc_run_vcpu(vcpu);
if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
@@ -4336,7 +4398,7 @@
kvmppc_core_prepare_to_enter(vcpu);
} else if (r == RESUME_PAGE_FAULT) {
srcu_idx = srcu_read_lock(&kvm->srcu);
- r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ r = kvmppc_book3s_hv_page_fault(vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&kvm->srcu, srcu_idx);
} else if (r == RESUME_PASSTHROUGH) {
@@ -4434,7 +4496,7 @@
slots = kvm_memslots(kvm);
memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
- if (!memslot->dirty_bitmap)
+ if (!memslot || !memslot->dirty_bitmap)
goto out;
/*
@@ -4481,29 +4543,30 @@
return r;
}
-static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
{
- if (!dont || free->arch.rmap != dont->arch.rmap) {
- vfree(free->arch.rmap);
- free->arch.rmap = NULL;
- }
-}
-
-static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- slot->arch.rmap = vzalloc(array_size(npages, sizeof(*slot->arch.rmap)));
- if (!slot->arch.rmap)
- return -ENOMEM;
-
- return 0;
+ vfree(slot->arch.rmap);
+ slot->arch.rmap = NULL;
}
static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ struct kvm_memory_slot *slot,
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
+ unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+
+ if (change == KVM_MR_CREATE) {
+ unsigned long size = array_size(npages, sizeof(*slot->arch.rmap));
+
+ if ((size >> PAGE_SHIFT) > totalram_pages())
+ return -ENOMEM;
+
+ slot->arch.rmap = vzalloc(size);
+ if (!slot->arch.rmap)
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -4540,6 +4603,27 @@
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
kvmppc_radix_flush_memslot(kvm, old);
+ /*
+ * If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+ */
+ if (!kvm->arch.secure_guest)
+ return;
+
+ switch (change) {
+ case KVM_MR_CREATE:
+ /*
+ * @TODO kvmppc_uvmem_memslot_create() can fail and
+ * return error. Fix this.
+ */
+ kvmppc_uvmem_memslot_create(kvm, new);
+ break;
+ case KVM_MR_DELETE:
+ kvmppc_uvmem_memslot_delete(kvm, old);
+ break;
+ default:
+ /* TODO: Handle KVM_MR_MOVE */
+ break;
+ }
}
/*
@@ -4569,11 +4653,6 @@
}
}
-static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
-{
- return;
-}
-
void kvmppc_setup_partition_table(struct kvm *kvm)
{
unsigned long dw0, dw1;
@@ -4641,14 +4720,14 @@
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
- down_read(¤t->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ mmap_read_lock(kvm->mm);
+ vma = find_vma(kvm->mm, hva);
if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
/* We can handle 4k, 64k or 16M pages in the VRMA */
if (psize >= 0x1000000)
@@ -4681,7 +4760,7 @@
return err;
up_out:
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
goto out_srcu;
}
@@ -4813,6 +4892,8 @@
char buf[32];
int ret;
+ mutex_init(&kvm->arch.uvmem_lock);
+ INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
mutex_init(&kvm->arch.mmu_setup_lock);
/* Allocate the guest's logical partition ID */
@@ -4982,15 +5063,18 @@
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
kvm->arch.process_table = 0;
+ if (kvm->arch.secure_guest)
+ uv_svm_terminate(kvm->arch.lpid);
kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
+
kvmppc_free_lpid(kvm->arch.lpid);
kvmppc_free_pimap(kvm);
}
/* We don't need to emulate any privileged instructions or dcbz */
-static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
return EMULATE_FAIL;
@@ -5429,6 +5513,109 @@
return rc;
}
+static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+ unpin_vpa(kvm, vpa);
+ vpa->gpa = 0;
+ vpa->pinned_addr = NULL;
+ vpa->dirty = false;
+ vpa->update_pending = 0;
+}
+
+/*
+ * Enable a guest to become a secure VM, or test whether
+ * that could be enabled.
+ * Called when the KVM_CAP_PPC_SECURE_GUEST capability is
+ * tested (kvm == NULL) or enabled (kvm != NULL).
+ */
+static int kvmhv_enable_svm(struct kvm *kvm)
+{
+ if (!kvmppc_uvmem_available())
+ return -EINVAL;
+ if (kvm)
+ kvm->arch.svm_enabled = 1;
+ return 0;
+}
+
+/*
+ * IOCTL handler to turn off secure mode of guest
+ *
+ * - Release all device pages
+ * - Issue ucall to terminate the guest on the UV side
+ * - Unpin the VPA pages.
+ * - Reinit the partition scoped page tables
+ */
+static int kvmhv_svm_off(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int mmu_was_ready;
+ int srcu_idx;
+ int ret = 0;
+ int i;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return ret;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ mmu_was_ready = kvm->arch.mmu_ready;
+ if (kvm->arch.mmu_ready) {
+ kvm->arch.mmu_ready = 0;
+ /* order mmu_ready vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.mmu_ready = 1;
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ struct kvm_memory_slot *memslot;
+ struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+
+ if (!slots)
+ continue;
+
+ kvm_for_each_memslot(memslot, slots) {
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ }
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ ret = uv_svm_terminate(kvm->arch.lpid);
+ if (ret != U_SUCCESS) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * When secure guest is reset, all the guest pages are sent
+ * to UV via UV_PAGE_IN before the non-boot vcpus get a
+ * chance to run and unpin their VPA pages. Unpinning of all
+ * VPA pages is done here explicitly so that VPA pages
+ * can be migrated to the secure side.
+ *
+ * This is required to for the secure SMP guest to reboot
+ * correctly.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ unpin_vpa_reset(kvm, &vcpu->arch.dtl);
+ unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
+ unpin_vpa_reset(kvm, &vcpu->arch.vpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+
+ kvmppc_setup_partition_table(kvm);
+ kvm->arch.secure_guest = 0;
+ kvm->arch.mmu_ready = mmu_was_ready;
+out:
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return ret;
+}
+
static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5436,6 +5623,7 @@
.set_one_reg = kvmppc_set_one_reg_hv,
.vcpu_load = kvmppc_core_vcpu_load_hv,
.vcpu_put = kvmppc_core_vcpu_put_hv,
+ .inject_interrupt = kvmppc_inject_interrupt_hv,
.set_msr = kvmppc_set_msr_hv,
.vcpu_run = kvmppc_vcpu_run_hv,
.vcpu_create = kvmppc_core_vcpu_create_hv,
@@ -5449,9 +5637,7 @@
.age_hva = kvm_age_hva_hv,
.test_age_hva = kvm_test_age_hva_hv,
.set_spte_hva = kvm_set_spte_hva_hv,
- .mmu_destroy = kvmppc_mmu_destroy_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
- .create_memslot = kvmppc_core_create_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
@@ -5471,6 +5657,8 @@
.enable_nested = kvmhv_enable_nested,
.load_from_eaddr = kvmhv_load_from_eaddr,
.store_to_eaddr = kvmhv_store_to_eaddr,
+ .enable_svm = kvmhv_enable_svm,
+ .svm_off = kvmhv_svm_off,
};
static int kvm_init_subcore_bitmap(void)
@@ -5579,11 +5767,16 @@
no_mixing_hpt_and_radix = true;
}
+ r = kvmppc_uvmem_init();
+ if (r < 0)
+ pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
+
return r;
}
static void kvmppc_book3s_exit_hv(void)
{
+ kvmppc_uvmem_free();
kvmppc_free_host_rm_ops();
if (kvmppc_radix_possible())
kvmppc_radix_exit();
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4a91b54..121fca2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -95,25 +95,17 @@
void __init kvm_cma_reserve(void)
{
unsigned long align_size;
- struct memblock_region *reg;
- phys_addr_t selected_size = 0;
+ phys_addr_t selected_size;
/*
* We need CMA reservation only when we are in HV mode
*/
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
- /*
- * We cannot use memblock_phys_mem_size() here, because
- * memblock_analyze() has not been called yet.
- */
- for_each_memblock(memory, reg)
- selected_size += memblock_region_memory_end_pfn(reg) -
- memblock_region_memory_base_pfn(reg);
- selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+ selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
if (selected_size) {
- pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+ pr_info("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
cma_declare_contiguous(0, selected_size, 0, align_size,
@@ -755,6 +747,71 @@
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+ /*
+ * Check for illegal transactional state bit combination
+ * and if we find it, force the TS field to a safe state.
+ */
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+ msr &= ~MSR_TS_MASK;
+ vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = vec;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+
+ /*
+ * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+ * applicable. AIL=2 is not supported.
+ *
+ * AIL does not apply to SRESET, MCE, or HMI (which is never
+ * delivered to the guest), and does not apply if IR=0 or DR=0.
+ */
+ if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+ vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+ (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+ (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+ new_msr |= MSR_IR | MSR_DR;
+ new_pc += 0xC000000000004000ULL;
+ }
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ vcpu->arch.shregs.msr = new_msr;
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ inject_interrupt(vcpu, vec, srr1_flags);
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
@@ -762,7 +819,6 @@
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
- unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
@@ -774,26 +830,16 @@
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
- vec = BOOK3S_INTERRUPT_EXTERNAL;
+ inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
- vec = BOOK3S_INTERRUPT_DECREMENTER;
+ inject_interrupt(vcpu,
+ BOOK3S_INTERRUPT_DECREMENTER, 0);
}
}
- if (vec) {
- unsigned long msr, old_msr = vcpu->arch.shregs.msr;
-
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, old_msr);
- kvmppc_set_pc(vcpu, vec);
- msr = vcpu->arch.intr_msr;
- if (MSR_TM_ACTIVE(old_msr))
- msr |= MSR_TS_S;
- vcpu->arch.shregs.msr = msr;
- }
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
@@ -821,6 +867,7 @@
"r" (0) : "memory");
}
asm volatile("ptesync": : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
} else {
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
@@ -831,7 +878,9 @@
rb += PPC_BIT(51); /* increment set number */
}
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 63fd81f..327417d 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -58,13 +58,16 @@
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
+ * Because of a hardware deviation in P8 and P9,
+ * we need to set LPCR[HDICE] before writing HDEC.
*/
-BEGIN_FTR_SECTION
ld r5, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_KVM(r5)
ld r9, KVM_HOST_LPCR(r6)
- andis. r9, r9, LPCR_LD@h
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ ori r8, r9, LPCR_HDICE
+ mtspr SPRN_LPCR, r8
+ isync
+ andis. r0, r9, LPCR_LD@h
mfspr r8,SPRN_DEC
mftb r7
BEGIN_FTR_SECTION
@@ -140,6 +143,14 @@
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_MMCR3
+ mfspr r6, SPRN_SIER2
+ mfspr r7, SPRN_SIER3
+ std r5, HSTATE_MMCR3(r13)
+ std r6, HSTATE_SIER2(r13)
+ std r7, HSTATE_SIER3(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 9906d20..d0b6c8c 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -11,11 +11,11 @@
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/llist.h>
+#include <linux/pgtable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
#include <asm/reg.h>
@@ -237,20 +237,21 @@
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
+ regs_ptr = kvmppc_get_gpr(vcpu, 5);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state));
+ sizeof(struct hv_guest_state)) ||
+ kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct pt_regs));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_PARAMETER;
+
if (kvmppc_need_byteswap(vcpu))
byteswap_hv_regs(&l2_hv);
if (l2_hv.version != HV_GUEST_STATE_VERSION)
return H_P2;
- regs_ptr = kvmppc_get_gpr(vcpu, 5);
- err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
- sizeof(struct pt_regs));
- if (err)
- return H_PARAMETER;
if (kvmppc_need_byteswap(vcpu))
byteswap_pt_regs(&l2_regs);
if (l2_hv.vcpu_token >= NR_CPUS)
@@ -311,8 +312,7 @@
r = RESUME_HOST;
break;
}
- r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
- lpcr);
+ r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
} while (is_kvmppc_resume_guest(r));
/* save L2 state for return */
@@ -345,12 +345,12 @@
byteswap_hv_regs(&l2_hv);
byteswap_pt_regs(&l2_regs);
}
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state));
- if (err)
- return H_AUTHORITY;
- err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct hv_guest_state)) ||
+ kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
sizeof(struct pt_regs));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_AUTHORITY;
@@ -510,7 +510,7 @@
if (eaddr & (0xFFFUL << 52))
return H_PARAMETER;
- buf = kzalloc(n, GFP_KERNEL);
+ buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
if (!buf)
return H_NO_MEM;
@@ -530,12 +530,16 @@
goto not_found;
/* Write what was loaded into our buffer back to the L1 guest */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto not_found;
} else {
/* Load the data to be stored from the L1 guest into our buf */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto not_found;
@@ -570,9 +574,12 @@
ret = -EFAULT;
ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
- if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
+ if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) {
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, ptbl_addr,
&ptbl_entry, sizeof(ptbl_entry));
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ }
if (ret) {
gp->l1_gr_to_hr = 0;
gp->process_table = 0;
@@ -583,7 +590,7 @@
kvmhv_set_nested_ptbl(gp);
}
-struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
+static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
{
struct kvm_nested_guest *gp;
long shadow_lpid;
@@ -771,6 +778,23 @@
return kvm->arch.nested_guests[lpid];
}
+pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
+ unsigned long ea, unsigned *hshift)
+{
+ struct kvm_nested_guest *gp;
+ pte_t *pte;
+
+ gp = kvmhv_find_nested(kvm, lpid);
+ if (!gp)
+ return NULL;
+
+ VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+ "%s called with kvm mmu_lock not held \n", __func__);
+ pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift);
+
+ return pte;
+}
+
static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
{
return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
@@ -813,19 +837,15 @@
unsigned long clr, unsigned long set,
unsigned long hpa, unsigned long mask)
{
- struct kvm_nested_guest *gp;
unsigned long gpa;
unsigned int shift, lpid;
pte_t *ptep;
gpa = n_rmap & RMAP_NESTED_GPA_MASK;
lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
- gp = kvmhv_find_nested(kvm, lpid);
- if (!gp)
- return;
/* Find the pte */
- ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
/*
* If the pte is present and the pfn is still the same, update the pte.
* If the pfn has changed then this is a stale rmap entry, the nested
@@ -875,7 +895,7 @@
return;
/* Find and invalidate the pte */
- ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
/* Don't spuriously invalidate ptes if the pfn has changed */
if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
@@ -942,7 +962,7 @@
int shift;
spin_lock(&kvm->mmu_lock);
- ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift);
if (!shift)
shift = PAGE_SHIFT;
if (ptep && pte_present(*ptep)) {
@@ -1190,7 +1210,7 @@
} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
/* Can we execute? */
if (!gpte_p->may_execute) {
- flags |= SRR1_ISI_N_OR_G;
+ flags |= SRR1_ISI_N_G_OR_CIP;
goto forward_to_l1;
}
} else {
@@ -1207,7 +1227,7 @@
forward_to_l1:
vcpu->arch.fault_dsisr = flags;
if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
- vcpu->arch.shregs.msr &= ~0x783f0000ul;
+ vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
vcpu->arch.shregs.msr |= flags;
}
return RESUME_HOST;
@@ -1233,16 +1253,16 @@
spin_lock(&kvm->mmu_lock);
/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
- ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
- gpte.raddr, kvm->arch.lpid);
+ ret = kvmppc_hv_handle_set_rc(kvm, false, writing,
+ gpte.raddr, kvm->arch.lpid);
if (!ret) {
ret = -EINVAL;
goto out_unlock;
}
/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
- ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
- gp->shadow_lpid);
+ ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
+ n_gpa, gp->l1_lpid);
if (!ret)
ret = -EINVAL;
else
@@ -1278,8 +1298,7 @@
}
/* called with gp->tlb_lock held */
-static long int __kvmhv_nested_page_fault(struct kvm_run *run,
- struct kvm_vcpu *vcpu,
+static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
struct kvm_nested_guest *gp)
{
struct kvm *kvm = vcpu->kvm;
@@ -1362,7 +1381,7 @@
}
/* passthrough of emulated MMIO case */
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
}
if (memslot->flags & KVM_MEM_READONLY) {
if (writing) {
@@ -1383,7 +1402,7 @@
/* See if can find translation in our partition scoped tables for L1 */
pte = __pte(0);
spin_lock(&kvm->mmu_lock);
- pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
if (!shift)
shift = PAGE_SHIFT;
if (pte_p)
@@ -1437,8 +1456,7 @@
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
- if (n_rmap)
- kfree(n_rmap);
+ kfree(n_rmap);
if (ret == -EAGAIN)
ret = RESUME_GUEST; /* Let the guest try again */
@@ -1449,13 +1467,13 @@
return RESUME_GUEST;
}
-long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu)
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
{
struct kvm_nested_guest *gp = vcpu->arch.nested;
long int ret;
mutex_lock(&gp->tlb_lock);
- ret = __kvmhv_nested_page_fault(run, vcpu, gp);
+ ret = __kvmhv_nested_page_fault(vcpu, gp);
mutex_unlock(&gp->tlb_lock);
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 79f7d07..6028628 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -244,7 +244,7 @@
{
bool resync_req;
- __this_cpu_inc(irq_stat.hmi_exceptions);
+ local_paca->hmi_irqs++;
if (hmi_handle_debugtrig(NULL) >= 0)
return 1;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 9bf3be4..3ddc83d 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -210,7 +210,7 @@
pte_t *ptep;
unsigned int writing;
unsigned long mmu_seq;
- unsigned long rcbits, irq_flags = 0;
+ unsigned long rcbits;
if (kvm_is_radix(kvm))
return H_FUNCTION;
@@ -248,17 +248,9 @@
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
- /*
- * If we had a page table table change after lookup, we would
- * retry via mmu_notifier_retry.
- */
- if (!realmode)
- local_irq_save(irq_flags);
- /*
- * If called in real mode we have MSR_EE = 0. Otherwise
- * we disable irq above.
- */
- ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift);
+
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
if (ptep) {
pte_t pte;
unsigned int host_pte_size;
@@ -272,8 +264,7 @@
* to <= host page size, if host is using hugepage
*/
if (host_pte_size < psize) {
- if (!realmode)
- local_irq_restore(flags);
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return H_PARAMETER;
}
pte = kvmppc_read_update_linux_pte(ptep, writing);
@@ -287,8 +278,7 @@
pa |= gpa & ~PAGE_MASK;
}
}
- if (!realmode)
- local_irq_restore(irq_flags);
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
ptel |= pa;
@@ -888,8 +878,8 @@
return ret;
}
-static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa,
- int writing, unsigned long *hpa,
+static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+ unsigned long gpa, int writing, unsigned long *hpa,
struct kvm_memory_slot **memslot_p)
{
struct kvm *kvm = vcpu->kvm;
@@ -908,7 +898,7 @@
hva = __gfn_to_hva_memslot(memslot, gfn);
/* Try to find the host pte for that virtual address */
- ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
if (!ptep)
return H_TOO_HARD;
pte = kvmppc_read_update_linux_pte(ptep, writing);
@@ -943,16 +933,11 @@
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
- ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot);
- if (ret != H_SUCCESS)
- return ret;
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
- /* Check if we've been invalidated */
- raw_spin_lock(&kvm->mmu_lock.rlock);
- if (mmu_notifier_retry(kvm, mmu_seq)) {
- ret = H_TOO_HARD;
+ ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot);
+ if (ret != H_SUCCESS)
goto out_unlock;
- }
/* Zero the page */
for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
@@ -960,7 +945,7 @@
kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
out_unlock:
- raw_spin_unlock(&kvm->mmu_lock.rlock);
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return ret;
}
@@ -976,19 +961,14 @@
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
- ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot);
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot);
if (ret != H_SUCCESS)
- return ret;
- ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL);
- if (ret != H_SUCCESS)
- return ret;
-
- /* Check if we've been invalidated */
- raw_spin_lock(&kvm->mmu_lock.rlock);
- if (mmu_notifier_retry(kvm, mmu_seq)) {
- ret = H_TOO_HARD;
goto out_unlock;
- }
+
+ ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL);
+ if (ret != H_SUCCESS)
+ goto out_unlock;
/* Copy the page */
memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
@@ -996,7 +976,7 @@
kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
out_unlock:
- raw_spin_unlock(&kvm->mmu_lock.rlock);
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return ret;
}
@@ -1260,7 +1240,7 @@
status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
if (!data) {
if (gr & (HPTE_R_N | HPTE_R_G))
- return status | SRR1_ISI_N_OR_G;
+ return status | SRR1_ISI_N_G_OR_CIP;
if (!hpte_read_permission(pp, slb_v & key))
return status | SRR1_ISI_PROT;
} else if (status & DSISR_ISSTORE) {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 287d591..c2c9c73 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -8,6 +8,7 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/kernel_stat.h>
+#include <linux/pgtable.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
@@ -15,7 +16,6 @@
#include <asm/xics.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
-#include <asm/pgtable.h>
#include <asm/ppc-opcode.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
@@ -764,7 +764,7 @@
return ics_rm_eoi(vcpu, irq);
}
-unsigned long eoi_rc;
+static unsigned long eoi_rc;
static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
{
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
index 174d75e..6f18632 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xive.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c
@@ -3,6 +3,7 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/kernel_stat.h>
+#include <linux/pgtable.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
@@ -11,7 +12,6 @@
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
-#include <asm/pgtable.h>
#include <asm/ppc-opcode.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index feaf6ca..b1d9aff 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -292,13 +292,16 @@
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
_GLOBAL(idle_kvm_start_guest)
- ld r4,PACAEMERGSP(r13)
mfcr r5
mflr r0
- std r1,0(r4)
- std r5,8(r4)
- std r0,16(r4)
- subi r1,r4,STACK_FRAME_OVERHEAD
+ std r5, 8(r1) // Save CR in caller's frame
+ std r0, 16(r1) // Save LR in caller's frame
+ // Create frame on emergency stack
+ ld r4, PACAEMERGSP(r13)
+ stdu r1, -SWITCH_FRAME_SIZE(r4)
+ // Switch to new frame on emergency stack
+ mr r1, r4
+ std r3, 32(r1) // Save SRR1 wakeup value
SAVE_NVGPRS(r1)
/*
@@ -350,6 +353,10 @@
kvm_secondary_got_guest:
+ // About to go to guest, clear saved SRR1
+ li r0, 0
+ std r0, 32(r1)
+
/* Set HSTATE_DSCR(r13) to something sensible */
ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
@@ -441,13 +448,12 @@
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
- /* set up r3 for return */
- mfspr r3,SPRN_SRR1
+ // Return SRR1 wakeup value, or 0 if we went into the guest
+ ld r3, 32(r1)
REST_NVGPRS(r1)
- addi r1, r1, STACK_FRAME_OVERHEAD
- ld r0, 16(r1)
- ld r5, 8(r1)
- ld r1, 0(r1)
+ ld r1, 0(r1) // Switch back to caller stack
+ ld r0, 16(r1) // Reload LR
+ ld r5, 8(r1) // Reload CR
mtlr r0
mtcr r5
blr
@@ -707,8 +713,8 @@
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
- mfspr r6, SPRN_DAWR
- mfspr r7, SPRN_DAWRX
+ mfspr r6, SPRN_DAWR0
+ mfspr r7, SPRN_DAWRX0
mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_CIABR(r1)
std r6, STACK_SLOT_DAWR(r1)
@@ -803,8 +809,8 @@
beq 1f
ld r5, VCPU_DAWR(r4)
ld r6, VCPU_DAWRX(r4)
- mtspr SPRN_DAWR, r5
- mtspr SPRN_DAWRX, r6
+ mtspr SPRN_DAWR0, r5
+ mtspr SPRN_DAWRX0, r6
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
@@ -1266,7 +1272,6 @@
* R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
* guest R12 saved in shadow VCPU SCRATCH0
- * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
* guest R13 saved in SPRN_SCRATCH0
*/
std r9, HSTATE_SCRATCH2(r13)
@@ -1367,12 +1372,7 @@
11: stw r3,VCPU_HEIR(r9)
/* these are volatile across C function calls */
-#ifdef CONFIG_RELOCATABLE
- ld r3, HSTATE_SCRATCH1(r13)
- mtctr r3
-#else
mfctr r3
-#endif
mfxer r4
std r3, VCPU_CTR(r9)
std r4, VCPU_XER(r9)
@@ -1772,8 +1772,8 @@
* If the DAWR doesn't work, it's ok to write these here as
* this value should always be zero
*/
- mtspr SPRN_DAWR, r6
- mtspr SPRN_DAWRX, r7
+ mtspr SPRN_DAWR0, r6
+ mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
@@ -1801,6 +1801,7 @@
tlbsync
ptesync
+BEGIN_FTR_SECTION
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
@@ -1830,11 +1831,20 @@
addi r7,r7,0x1000
bdnz 1b
ptesync
+END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
2:
#endif /* CONFIG_PPC_RADIX_MMU */
/*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+BEGIN_FTR_SECTION
+ PPC_CP_ABORT
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
+
+ /*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
* have to coordinate the hardware threads.
@@ -2529,7 +2539,7 @@
.globl hcall_real_table_end
hcall_real_table_end:
-_GLOBAL(kvmppc_h_set_xdabr)
+_GLOBAL_TOC(kvmppc_h_set_xdabr)
EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
andi. r0, r5, DABRX_USER | DABRX_KERNEL
beq 6f
@@ -2539,7 +2549,7 @@
6: li r3, H_PARAMETER
blr
-_GLOBAL(kvmppc_h_set_dabr)
+_GLOBAL_TOC(kvmppc_h_set_dabr)
EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
li r5, DABRX_USER | DABRX_KERNEL
3:
@@ -2581,8 +2591,8 @@
mfmsr r6
andi. r6, r6, MSR_DR /* in real mode? */
bne 4f
- mtspr SPRN_DAWR, r4
- mtspr SPRN_DAWRX, r5
+ mtspr SPRN_DAWR0, r4
+ mtspr SPRN_DAWRX0, r5
4: li r3, 0
blr
@@ -2911,6 +2921,11 @@
beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
li r6, XIVE_ESB_SET_PQ_10
b 5f
4: li r0, 1
@@ -3288,7 +3303,6 @@
* r12 is (CR << 32) | vector
* r13 points to our PACA
* r12 is saved in HSTATE_SCRATCH0(r13)
- * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE
* r9 is saved in HSTATE_SCRATCH2(r13)
* r13 is saved in HSPRG1
* cfar is saved in HSTATE_CFAR(r13)
@@ -3337,11 +3351,7 @@
ld r5, HSTATE_CFAR(r13)
std r5, ORIG_GPR3(r1)
mflr r3
-#ifdef CONFIG_RELOCATABLE
- ld r4, HSTATE_SCRATCH1(r13)
-#else
mfctr r4
-#endif
mfxer r5
lbz r6, PACAIRQSOFTMASK(r13)
std r3, _LINK(r1)
@@ -3370,7 +3380,7 @@
mtspr SPRN_AMR, r0
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_DAWRX0, r0
BEGIN_MMU_FTR_SECTION
b 4f
@@ -3464,7 +3474,7 @@
mtspr SPRN_PMC6, r9
ld r3, VCPU_MMCR(r4)
ld r5, VCPU_MMCR + 8(r4)
- ld r6, VCPU_MMCR + 16(r4)
+ ld r6, VCPU_MMCRA(r4)
ld r7, VCPU_SIAR(r4)
ld r8, VCPU_SDAR(r4)
mtspr SPRN_MMCR1, r5
@@ -3472,14 +3482,22 @@
mtspr SPRN_SIAR, r7
mtspr SPRN_SDAR, r8
BEGIN_FTR_SECTION
- ld r5, VCPU_MMCR + 24(r4)
+ ld r5, VCPU_MMCR + 24(r4)
+ ld r6, VCPU_SIER + 8(r4)
+ ld r7, VCPU_SIER + 16(r4)
+ mtspr SPRN_MMCR3, r5
+ mtspr SPRN_SIER2, r6
+ mtspr SPRN_SIER3, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
+BEGIN_FTR_SECTION
+ ld r5, VCPU_MMCR + 16(r4)
ld r6, VCPU_SIER(r4)
mtspr SPRN_MMCR2, r5
mtspr SPRN_SIER, r6
BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4)
- ld r9, VCPU_MMCR + 32(r4)
+ ld r9, VCPU_MMCRS(r4)
mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9
@@ -3532,6 +3550,14 @@
mtspr SPRN_MMCR2, r8
mtspr SPRN_SIER, r9
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+ ld r5, HSTATE_MMCR3(r13)
+ ld r6, HSTATE_SIER2(r13)
+ ld r7, HSTATE_SIER3(r13)
+ mtspr SPRN_MMCR3, r5
+ mtspr SPRN_SIER2, r6
+ mtspr SPRN_SIER3, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mtspr SPRN_MMCR0, r3
isync
mtlr r0
@@ -3587,10 +3613,18 @@
mfspr r8, SPRN_SDAR
std r4, VCPU_MMCR(r9)
std r5, VCPU_MMCR + 8(r9)
- std r6, VCPU_MMCR + 16(r9)
+ std r6, VCPU_MMCRA(r9)
BEGIN_FTR_SECTION
- std r10, VCPU_MMCR + 24(r9)
+ std r10, VCPU_MMCR + 16(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_MMCR3
+ mfspr r6, SPRN_SIER2
+ mfspr r7, SPRN_SIER3
+ std r5, VCPU_MMCR + 24(r9)
+ std r6, VCPU_SIER + 8(r9)
+ std r7, VCPU_SIER + 16(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
std r7, VCPU_SIAR(r9)
std r8, VCPU_SDAR(r9)
mfspr r3, SPRN_PMC1
@@ -3614,7 +3648,7 @@
mfspr r8, SPRN_MMCRS
stw r6, VCPU_PMC + 24(r9)
stw r7, VCPU_PMC + 28(r9)
- std r8, VCPU_MMCR + 32(r9)
+ std r8, VCPU_MMCRS(r9)
lis r4, 0x8000
mtspr SPRN_MMCRS, r4
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
new file mode 100644
index 0000000..84e5a2d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -0,0 +1,1213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Secure pages management: Migration of pages between normal and secure
+ * memory of KVM guests.
+ *
+ * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+/*
+ * A pseries guest can be run as secure guest on Ultravisor-enabled
+ * POWER platforms. On such platforms, this driver will be used to manage
+ * the movement of guest pages between the normal memory managed by
+ * hypervisor (HV) and secure memory managed by Ultravisor (UV).
+ *
+ * The page-in or page-out requests from UV will come to HV as hcalls and
+ * HV will call back into UV via ultracalls to satisfy these page requests.
+ *
+ * Private ZONE_DEVICE memory equal to the amount of secure memory
+ * available in the platform for running secure guests is hotplugged.
+ * Whenever a page belonging to the guest becomes secure, a page from this
+ * private device memory is used to represent and track that secure page
+ * on the HV side. Some pages (like virtio buffers, VPA pages etc) are
+ * shared between UV and HV. However such pages aren't represented by
+ * device private memory and mappings to shared memory exist in both
+ * UV and HV page tables.
+ */
+
+/*
+ * Notes on locking
+ *
+ * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
+ * page-in and page-out requests for the same GPA. Concurrent accesses
+ * can either come via UV (guest vCPUs requesting for same page)
+ * or when HV and guest simultaneously access the same page.
+ * This mutex serializes the migration of page from HV(normal) to
+ * UV(secure) and vice versa. So the serialization points are around
+ * migrate_vma routines and page-in/out routines.
+ *
+ * Per-guest mutex comes with a cost though. Mainly it serializes the
+ * fault path as page-out can occur when HV faults on accessing secure
+ * guest pages. Currently UV issues page-in requests for all the guest
+ * PFNs one at a time during early boot (UV_ESM uvcall), so this is
+ * not a cause for concern. Also currently the number of page-outs caused
+ * by HV touching secure pages is very very low. If an when UV supports
+ * overcommitting, then we might see concurrent guest driven page-outs.
+ *
+ * Locking order
+ *
+ * 1. kvm->srcu - Protects KVM memslots
+ * 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise
+ * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
+ * as sync-points for page-in/out
+ */
+
+/*
+ * Notes on page size
+ *
+ * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
+ * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
+ * secure GPAs at 64K page size and maintains one device PFN for each
+ * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
+ * for 64K page at a time.
+ *
+ * HV faulting on secure pages: When HV touches any secure page, it
+ * faults and issues a UV_PAGE_OUT request with 64K page size. Currently
+ * UV splits and remaps the 2MB page if necessary and copies out the
+ * required 64K page contents.
+ *
+ * Shared pages: Whenever guest shares a secure page, UV will split and
+ * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
+ *
+ * HV invalidating a page: When a regular page belonging to secure
+ * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
+ * page size. Using 64K page size is correct here because any non-secure
+ * page will essentially be of 64K page size. Splitting by UV during sharing
+ * and page-out ensures this.
+ *
+ * Page fault handling: When HV handles page fault of a page belonging
+ * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
+ * Using 64K size is correct here too as UV would have split the 2MB page
+ * into 64k mappings and would have done page-outs earlier.
+ *
+ * In summary, the current secure pages handling code in HV assumes
+ * 64K page size and in fact fails any page-in/page-out requests of
+ * non-64K size upfront. If and when UV starts supporting multiple
+ * page-sizes, we need to break this assumption.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/kvm_host.h>
+#include <linux/ksm.h>
+#include <asm/ultravisor.h>
+#include <asm/mman.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_uvmem.h>
+
+static struct dev_pagemap kvmppc_uvmem_pgmap;
+static unsigned long *kvmppc_uvmem_bitmap;
+static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
+
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ * a Secure VM, the contents of the GFN is not accessible
+ * to the Hypervisor. This GFN can be backed by a secure-PFN,
+ * or can be backed by a normal-PFN with contents encrypted.
+ * The former is true when the GFN is paged-in into the
+ * ultravisor. The latter is true when the GFN is paged-out
+ * of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ * a secure VM. The contents of the GFN is accessible to
+ * Hypervisor. This GFN is backed by a normal-PFN and its
+ * content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ * a normal VM. The contents of the GFN is accesible to
+ * the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM: A VM whose contents are always accessible to
+ * the hypervisor. All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ * hypervisor without the VM's consent. Its GFNs are
+ * either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ * The transition starts on successful return of
+ * H_SVM_INIT_START, and ends on successful return
+ * of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ * in any of the three states; i.e Secure-GFN, Shared-GFN,
+ * and Normal-GFN. The VM never executes in this state
+ * in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ * The state of a memory slot mirrors the state of the
+ * VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ * A VM always starts in Normal Mode.
+ *
+ * H_SVM_INIT_START moves the VM into transient state. During this
+ * time the Ultravisor may request some of its GFNs to be shared or
+ * secured. So its GFNs can be in one of the three GFN states.
+ *
+ * H_SVM_INIT_DONE moves the VM entirely from transient state to
+ * secure-state. At this point any left-over normal-GFNs are
+ * transitioned to Secure-GFN.
+ *
+ * H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ * All its GFNs are moved to Normal-GFNs.
+ *
+ * UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ * the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ * Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * | | Share | Unshare | SVM |H_SVM_INIT_DONE|
+ * | |operation |operation | abort/ | |
+ * | | | | terminate | |
+ * -------------------------------------------------------------
+ * | | | | | |
+ * | Secure | Shared | Secure |Normal |Secure |
+ * | | | | | |
+ * | Shared | Shared | Secure |Normal |Shared |
+ * | | | | | |
+ * | Normal | Shared | Secure |Normal |Secure |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
+ * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
+ * | | | | | | |
+ * --------- ----------------------------------------------------------
+ * | | | | | | |
+ * | Normal | Normal | Transient|Error |Error |Normal |
+ * | | | | | | |
+ * | Secure | Error | Error |Error |Error |Normal |
+ * | | | | | | |
+ * |Transient| N/A | Error |Secure |Normal |Normal |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN (1UL << 63)
+#define KVMPPC_GFN_MEM_PFN (1UL << 62)
+#define KVMPPC_GFN_SHARED (1UL << 61)
+#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK)
+
+struct kvmppc_uvmem_slot {
+ struct list_head list;
+ unsigned long nr_pfns;
+ unsigned long base_pfn;
+ unsigned long *pfns;
+};
+struct kvmppc_uvmem_page_pvt {
+ struct kvm *kvm;
+ unsigned long gpa;
+ bool skip_page_out;
+ bool remove_gfn;
+};
+
+bool kvmppc_uvmem_available(void)
+{
+ /*
+ * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor
+ * and our data structures have been initialized successfully.
+ */
+ return !!kvmppc_uvmem_bitmap;
+}
+
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
+ if (!p->pfns) {
+ kfree(p);
+ return -ENOMEM;
+ }
+ p->nr_pfns = slot->npages;
+ p->base_pfn = slot->base_gfn;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ list_add(&p->list, &kvm->arch.uvmem_pfns);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return 0;
+}
+
+/*
+ * All device PFNs are already released by the time we come here.
+ */
+void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ struct kvmppc_uvmem_slot *p, *next;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
+ if (p->base_pfn == slot->base_gfn) {
+ vfree(p->pfns);
+ list_del(&p->list);
+ kfree(p);
+ break;
+ }
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+}
+
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long flag, unsigned long uvmem_pfn)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ unsigned long index = gfn - p->base_pfn;
+
+ if (flag == KVMPPC_GFN_UVMEM_PFN)
+ p->pfns[index] = uvmem_pfn | flag;
+ else
+ p->pfns[index] = flag;
+ return;
+ }
+ }
+}
+
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+ unsigned long uvmem_pfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
+
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
+}
+
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, 0, 0);
+}
+
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
+static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long *uvmem_pfn)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ unsigned long index = gfn - p->base_pfn;
+
+ if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
+ if (uvmem_pfn)
+ *uvmem_pfn = p->pfns[index] &
+ KVMPPC_GFN_PFN_MASK;
+ return true;
+ } else
+ return false;
+ }
+ }
+ return false;
+}
+
+/*
+ * starting from *gfn search for the next available GFN that is not yet
+ * transitioned to a secure GFN. return the value of that GFN in *gfn. If a
+ * GFN is found, return true, else return false
+ *
+ * Must be called with kvm->arch.uvmem_lock held.
+ */
+static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+{
+ struct kvmppc_uvmem_slot *p;
+ bool ret = false;
+ unsigned long i;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
+ if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
+ break;
+ if (!p)
+ return ret;
+ /*
+ * The code below assumes, one to one correspondence between
+ * kvmppc_uvmem_slot and memslot.
+ */
+ for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) {
+ unsigned long index = i - p->base_pfn;
+
+ if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) {
+ *gfn = i;
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot, bool merge)
+{
+ unsigned long gfn = memslot->base_gfn;
+ unsigned long end, start = gfn_to_hva(kvm, gfn);
+ int ret = 0;
+ struct vm_area_struct *vma;
+ int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+ if (kvm_is_error_hva(start))
+ return H_STATE;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+
+ mmap_write_lock(kvm->mm);
+ do {
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma) {
+ ret = H_STATE;
+ break;
+ }
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ merge_flag, &vma->vm_flags);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+ start = vma->vm_end;
+ } while (end > vma->vm_end);
+
+ mmap_write_unlock(kvm->mm);
+ return ret;
+}
+
+static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ kvmppc_uvmem_slot_free(kvm, memslot);
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+}
+
+static int __kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ int ret = H_PARAMETER;
+
+ if (kvmppc_memslot_page_merge(kvm, memslot, false))
+ return ret;
+
+ if (kvmppc_uvmem_slot_init(kvm, memslot))
+ goto out1;
+
+ ret = uv_register_mem_slot(kvm->arch.lpid,
+ memslot->base_gfn << PAGE_SHIFT,
+ memslot->npages * PAGE_SIZE,
+ 0, memslot->id);
+ if (ret < 0) {
+ ret = H_PARAMETER;
+ goto out;
+ }
+ return 0;
+out:
+ kvmppc_uvmem_slot_free(kvm, memslot);
+out1:
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot, *m;
+ int ret = H_SUCCESS;
+ int srcu_idx;
+
+ kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
+
+ if (!kvmppc_uvmem_bitmap)
+ return H_UNSUPPORTED;
+
+ /* Only radix guests can be secure guests */
+ if (!kvm_is_radix(kvm))
+ return H_UNSUPPORTED;
+
+ /* NAK the transition to secure if not enabled */
+ if (!kvm->arch.svm_enabled)
+ return H_AUTHORITY;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ /* register the memslot */
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(m, slots) {
+ if (m == memslot)
+ break;
+ __kvmppc_uvmem_memslot_delete(kvm, memslot);
+ }
+ }
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ * Caller must held kvm->arch.uvmem_lock.
+ */
+static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
+{
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig;
+ struct page *dpage, *spage;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ int ret = U_SUCCESS;
+
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.pgmap_owner = &kvmppc_uvmem_pgmap;
+ mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+
+ /* The requested page is already paged-out, nothing to do */
+ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+ return ret;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return -1;
+
+ spage = migrate_pfn_to_page(*mig.src);
+ if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+ goto out_finalize;
+
+ if (!is_zone_device_page(spage))
+ goto out_finalize;
+
+ dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ lock_page(dpage);
+ pvt = spage->zone_device_data;
+ pfn = page_to_pfn(dpage);
+
+ /*
+ * This function is used in two cases:
+ * - When HV touches a secure page, for which we do UV_PAGE_OUT
+ * - When a secure page is converted to shared page, we *get*
+ * the page to essentially unmap the device page. In this
+ * case we skip page-out.
+ */
+ if (!pvt->skip_page_out)
+ ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+
+ if (ret == U_SUCCESS)
+ *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
+ else {
+ unlock_page(dpage);
+ __free_page(dpage);
+ goto out_finalize;
+ }
+
+ migrate_vma_pages(&mig);
+
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
+{
+ int ret;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return ret;
+}
+
+/*
+ * Drop device pages that we maintain for the secure guest
+ *
+ * We first mark the pages to be skipped from UV_PAGE_OUT when there
+ * is HV side fault on these pages. Next we *get* these pages, forcing
+ * fault on them, do fault time migration to replace the device PTEs in
+ * QEMU page table with normal PTEs from newly allocated pages.
+ */
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
+ struct kvm *kvm, bool skip_page_out)
+{
+ int i;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ struct page *uvmem_page;
+ struct vm_area_struct *vma = NULL;
+ unsigned long uvmem_pfn, gfn;
+ unsigned long addr;
+
+ mmap_read_lock(kvm->mm);
+
+ addr = slot->userspace_addr;
+
+ gfn = slot->base_gfn;
+ for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) {
+
+ /* Fetch the VMA if addr is not in the latest fetched one */
+ if (!vma || addr >= vma->vm_end) {
+ vma = find_vma_intersection(kvm->mm, addr, addr+1);
+ if (!vma) {
+ pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
+ break;
+ }
+ }
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = skip_page_out;
+ pvt->remove_gfn = true;
+
+ if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
+ PAGE_SHIFT, kvm, pvt->gpa))
+ pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
+ pvt->gpa, addr);
+ } else {
+ /* Remove the shared flag if any */
+ kvmppc_gfn_remove(gfn, kvm);
+ }
+
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ }
+
+ mmap_read_unlock(kvm->mm);
+}
+
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ int srcu_idx;
+ struct kvm_memory_slot *memslot;
+
+ /*
+ * Expect to be called only after INIT_START and before INIT_DONE.
+ * If INIT_DONE was completed, use normal VM termination sequence.
+ */
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return H_STATE;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+ kvmppc_uvmem_drop_pages(memslot, kvm, false);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ kvm->arch.secure_guest = 0;
+ uv_svm_terminate(kvm->arch.lpid);
+
+ return H_PARAMETER;
+}
+
+/*
+ * Get a free device PFN from the pool
+ *
+ * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
+ * PFN will be used to keep track of the secure page on HV side.
+ *
+ * Called with kvm->arch.uvmem_lock held
+ */
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
+{
+ struct page *dpage = NULL;
+ unsigned long bit, uvmem_pfn;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn_last, pfn_first;
+
+ pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
+ pfn_last = pfn_first +
+ (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
+
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
+ pfn_last - pfn_first);
+ if (bit >= (pfn_last - pfn_first))
+ goto out;
+ bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+ pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
+ if (!pvt)
+ goto out_clear;
+
+ uvmem_pfn = bit + pfn_first;
+ kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+
+ pvt->gpa = gpa;
+ pvt->kvm = kvm;
+
+ dpage = pfn_to_page(uvmem_pfn);
+ dpage->zone_device_data = pvt;
+ get_page(dpage);
+ lock_page(dpage);
+ return dpage;
+out_clear:
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
+out:
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+ return NULL;
+}
+
+/*
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int kvmppc_svm_page_in(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long gpa, struct kvm *kvm,
+ unsigned long page_shift,
+ bool pagein)
+{
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig;
+ struct page *spage;
+ unsigned long pfn;
+ struct page *dpage;
+ int ret = 0;
+
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.flags = MIGRATE_VMA_SELECT_SYSTEM;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return ret;
+
+ if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ dpage = kvmppc_uvmem_get_page(gpa, kvm);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ if (pagein) {
+ pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+ spage = migrate_pfn_to_page(*mig.src);
+ if (spage) {
+ ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+ if (ret)
+ goto out_finalize;
+ }
+ }
+
+ *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+ migrate_vma_pages(&mig);
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long gfn = memslot->base_gfn;
+ struct vm_area_struct *vma;
+ unsigned long start, end;
+ int ret = 0;
+
+ mmap_read_lock(kvm->mm);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
+ ret = H_STATE;
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ break;
+
+ end = start + (1UL << PAGE_SHIFT);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ break;
+
+ ret = kvmppc_svm_page_in(vma, start, end,
+ (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+
+ /* relinquish the cpu if needed */
+ cond_resched();
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ mmap_read_unlock(kvm->mm);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+ long ret = H_SUCCESS;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ /* migrate any unmoved normal pfn to device pfns*/
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+ if (ret) {
+ /*
+ * The pages will remain transitioned.
+ * Its the callers responsibility to
+ * terminate the VM, which will undo
+ * all state of the VM. Till then
+ * this VM is in a erroneous state.
+ * Its KVMPPC_SECURE_INIT_DONE will
+ * remain unset.
+ */
+ ret = H_STATE;
+ goto out;
+ }
+ }
+
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+ pr_info("LPID %d went secure\n", kvm->arch.lpid);
+
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
+ * to unmap the device page from QEMU's page tables.
+ */
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long page_shift)
+{
+
+ int ret = H_PARAMETER;
+ struct page *uvmem_page;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ unsigned long gfn = gpa >> page_shift;
+ int srcu_idx;
+ unsigned long uvmem_pfn;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = true;
+ /*
+ * do not drop the GFN. It is a valid GFN
+ * that is transitioned to a shared GFN.
+ */
+ pvt->remove_gfn = false;
+ }
+
+retry:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ goto out;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = true;
+ pvt->remove_gfn = false; /* it continues to be a valid GFN */
+ kvm_release_pfn_clean(pfn);
+ goto retry;
+ }
+
+ if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+ page_shift)) {
+ kvmppc_gfn_shared(gfn, kvm);
+ ret = H_SUCCESS;
+ }
+ kvm_release_pfn_clean(pfn);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * H_SVM_PAGE_IN: Move page from normal memory to secure memory.
+ *
+ * H_PAGE_IN_SHARED flag makes the page shared which means that the same
+ * memory in is visible from both UV and HV.
+ */
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags,
+ unsigned long page_shift)
+{
+ unsigned long start, end;
+ struct vm_area_struct *vma;
+ int srcu_idx;
+ unsigned long gfn = gpa >> page_shift;
+ int ret;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (page_shift != PAGE_SHIFT)
+ return H_P3;
+
+ if (flags & ~H_PAGE_IN_SHARED)
+ return H_P2;
+
+ if (flags & H_PAGE_IN_SHARED)
+ return kvmppc_share_page(kvm, gpa, page_shift);
+
+ ret = H_PARAMETER;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mmap_read_lock(kvm->mm);
+
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ goto out;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ /* Fail the page-in request of an already paged-in page */
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+ goto out_unlock;
+
+ end = start + (1UL << page_shift);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ goto out_unlock;
+
+ if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+ true))
+ goto out_unlock;
+
+ ret = H_SUCCESS;
+
+out_unlock:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+ mmap_read_unlock(kvm->mm);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+
+/*
+ * Fault handler callback that gets called when HV touches any page that
+ * has been moved to secure memory, we ask UV to give back the page by
+ * issuing UV_PAGE_OUT uvcall.
+ *
+ * This eventually results in dropping of device PFN and the newly
+ * provisioned page/PFN gets populated in QEMU page tables.
+ */
+static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
+{
+ struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
+
+ if (kvmppc_svm_page_out(vmf->vma, vmf->address,
+ vmf->address + PAGE_SIZE, PAGE_SHIFT,
+ pvt->kvm, pvt->gpa))
+ return VM_FAULT_SIGBUS;
+ else
+ return 0;
+}
+
+/*
+ * Release the device PFN back to the pool
+ *
+ * Gets called when secure GFN tranistions from a secure-PFN
+ * to a normal PFN during H_SVM_PAGE_OUT.
+ * Gets called with kvm->arch.uvmem_lock held.
+ */
+static void kvmppc_uvmem_page_free(struct page *page)
+{
+ unsigned long pfn = page_to_pfn(page) -
+ (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
+ struct kvmppc_uvmem_page_pvt *pvt;
+
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+ pvt = page->zone_device_data;
+ page->zone_device_data = NULL;
+ if (pvt->remove_gfn)
+ kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ else
+ kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ kfree(pvt);
+}
+
+static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
+ .page_free = kvmppc_uvmem_page_free,
+ .migrate_to_ram = kvmppc_uvmem_migrate_to_ram,
+};
+
+/*
+ * H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
+ */
+unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags, unsigned long page_shift)
+{
+ unsigned long gfn = gpa >> page_shift;
+ unsigned long start, end;
+ struct vm_area_struct *vma;
+ int srcu_idx;
+ int ret;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (page_shift != PAGE_SHIFT)
+ return H_P3;
+
+ if (flags)
+ return H_P2;
+
+ ret = H_PARAMETER;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mmap_read_lock(kvm->mm);
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ goto out;
+
+ end = start + (1UL << page_shift);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ goto out;
+
+ if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
+ ret = H_SUCCESS;
+out:
+ mmap_read_unlock(kvm->mm);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+ unsigned long pfn;
+ int ret = U_SUCCESS;
+
+ pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+ goto out;
+
+ ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT,
+ 0, PAGE_SHIFT);
+out:
+ kvm_release_pfn_clean(pfn);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
+}
+
+int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new)
+{
+ int ret = __kvmppc_uvmem_memslot_create(kvm, new);
+
+ if (!ret)
+ ret = kvmppc_uv_migrate_mem_slot(kvm, new);
+
+ return ret;
+}
+
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old)
+{
+ __kvmppc_uvmem_memslot_delete(kvm, old);
+}
+
+static u64 kvmppc_get_secmem_size(void)
+{
+ struct device_node *np;
+ int i, len;
+ const __be32 *prop;
+ u64 size = 0;
+
+ /*
+ * First try the new ibm,secure-memory nodes which supersede the
+ * secure-memory-ranges property.
+ * If we found some, no need to read the deprecated ones.
+ */
+ for_each_compatible_node(np, NULL, "ibm,secure-memory") {
+ prop = of_get_property(np, "reg", &len);
+ if (!prop)
+ continue;
+ size += of_read_number(prop + 2, 2);
+ }
+ if (size)
+ return size;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+ if (!np)
+ goto out;
+
+ prop = of_get_property(np, "secure-memory-ranges", &len);
+ if (!prop)
+ goto out_put;
+
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++)
+ size += of_read_number(prop + (i * 4) + 2, 2);
+
+out_put:
+ of_node_put(np);
+out:
+ return size;
+}
+
+int kvmppc_uvmem_init(void)
+{
+ int ret = 0;
+ unsigned long size;
+ struct resource *res;
+ void *addr;
+ unsigned long pfn_last, pfn_first;
+
+ size = kvmppc_get_secmem_size();
+ if (!size) {
+ /*
+ * Don't fail the initialization of kvm-hv module if
+ * the platform doesn't export ibm,uv-firmware node.
+ * Let normal guests run on such PEF-disabled platform.
+ */
+ pr_info("KVMPPC-UVMEM: No support for secure guests\n");
+ goto out;
+ }
+
+ res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
+ if (IS_ERR(res)) {
+ ret = PTR_ERR(res);
+ goto out;
+ }
+
+ kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
+ kvmppc_uvmem_pgmap.range.start = res->start;
+ kvmppc_uvmem_pgmap.range.end = res->end;
+ kvmppc_uvmem_pgmap.nr_range = 1;
+ kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
+ /* just one global instance: */
+ kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
+ addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto out_free_region;
+ }
+
+ pfn_first = res->start >> PAGE_SHIFT;
+ pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
+ kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!kvmppc_uvmem_bitmap) {
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
+ return ret;
+out_unmap:
+ memunmap_pages(&kvmppc_uvmem_pgmap);
+out_free_region:
+ release_mem_region(res->start, size);
+out:
+ return ret;
+}
+
+void kvmppc_uvmem_free(void)
+{
+ if (!kvmppc_uvmem_bitmap)
+ return;
+
+ memunmap_pages(&kvmppc_uvmem_pgmap);
+ release_mem_region(kvmppc_uvmem_pgmap.range.start,
+ range_len(&kvmppc_uvmem_pgmap.range));
+ kfree(kvmppc_uvmem_bitmap);
+}
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index f7ad99d..25a3679 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -26,7 +26,7 @@
#define FUNC(name) name
#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
-#endif /* CONFIG_PPC_BOOK3S_XX */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#define VCPU_LOAD_NVGPRS(vcpu) \
PPC_LL r14, VCPU_GPR(R14)(vcpu); \
@@ -55,8 +55,7 @@
****************************************************************************/
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
@@ -68,8 +67,8 @@
/* Save host state to the stack */
PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
- /* Save r3 (kvm_run) and r4 (vcpu) */
- SAVE_2GPRS(3, r1)
+ /* Save r3 (vcpu) */
+ SAVE_GPR(3, r1)
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
@@ -82,47 +81,46 @@
PPC_STL r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- mr r3, r4
bl FUNC(kvmppc_copy_to_svcpu)
nop
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
#ifdef CONFIG_PPC_BOOK3S_64
/* Get the dcbz32 flag */
- PPC_LL r3, VCPU_HFLAGS(r4)
- rldicl r3, r3, 0, 63 /* r3 &= 1 */
- stb r3, HSTATE_RESTORE_HID5(r13)
+ PPC_LL r0, VCPU_HFLAGS(r3)
+ rldicl r0, r0, 0, 63 /* r3 &= 1 */
+ stb r0, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
- lwz r3, VCPU_SHAREDBE(r4)
- cmpwi r3, 0
- ld r5, VCPU_SHARED(r4)
+ lbz r4, VCPU_SHAREDBE(r3)
+ cmpwi r4, 0
+ ld r5, VCPU_SHARED(r3)
beq sprg3_little_endian
sprg3_big_endian:
#ifdef __BIG_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
b after_sprg3_load
sprg3_little_endian:
#ifdef __LITTLE_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
after_sprg3_load:
- mtspr SPRN_SPRG3, r3
+ mtspr SPRN_SPRG3, r4
#endif /* CONFIG_PPC_BOOK3S_64 */
- PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
+ PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
bl FUNC(kvmppc_entry_trampoline)
@@ -146,7 +144,7 @@
*
*/
- PPC_LL r3, GPR4(r1) /* vcpu pointer */
+ PPC_LL r3, GPR3(r1) /* vcpu pointer */
/*
* kvmppc_copy_from_svcpu can clobber volatile registers, save
@@ -169,7 +167,7 @@
#endif /* CONFIG_PPC_BOOK3S_64 */
/* R7 = vcpu */
- PPC_LL r7, GPR4(r1)
+ PPC_LL r7, GPR3(r1)
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
@@ -190,11 +188,11 @@
PPC_STL r30, VCPU_GPR(R30)(r7)
PPC_STL r31, VCPU_GPR(R31)(r7)
- /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
- lwz r5, VCPU_TRAP(r7)
+ /* Pass the exit number as 2nd argument to kvmppc_handle_exit */
+ lwz r4, VCPU_TRAP(r7)
- /* Restore r3 (kvm_run) and r4 (vcpu) */
- REST_2GPRS(3, r1)
+ /* Restore r3 (vcpu) */
+ REST_GPR(3, r1)
bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
@@ -223,11 +221,11 @@
PPC_LL r4, _LINK(r1)
PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
- /* Load vcpu and cpu_run */
- REST_2GPRS(3, r1)
+ /* Load vcpu */
+ REST_GPR(3, r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
@@ -235,7 +233,7 @@
kvm_loop_lightweight:
/* We'll need the vcpu pointer */
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bf02827..a114367 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -169,7 +169,7 @@
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
}
-static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_fpr_load(struct kvm_vcpu *vcpu,
int rs, ulong addr, int ls_type)
{
int emulated = EMULATE_FAIL;
@@ -188,7 +188,7 @@
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
len, 1);
goto done_load;
}
@@ -213,7 +213,7 @@
return emulated;
}
-static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_fpr_store(struct kvm_vcpu *vcpu,
int rs, ulong addr, int ls_type)
{
int emulated = EMULATE_FAIL;
@@ -248,7 +248,7 @@
if (r < 0) {
kvmppc_inject_pf(vcpu, addr, true);
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_store(run, vcpu, val, len, 1);
+ emulated = kvmppc_handle_store(vcpu, val, len, 1);
} else {
emulated = EMULATE_DONE;
}
@@ -259,7 +259,7 @@
return emulated;
}
-static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_psq_load(struct kvm_vcpu *vcpu,
int rs, ulong addr, bool w, int i)
{
int emulated = EMULATE_FAIL;
@@ -279,12 +279,12 @@
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if ((r == EMULATE_DO_MMIO) && w) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
4, 1);
vcpu->arch.qpr[rs] = tmp[1];
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
+ emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FQPR | rs,
8, 1);
goto done_load;
}
@@ -302,7 +302,7 @@
return emulated;
}
-static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_psq_store(struct kvm_vcpu *vcpu,
int rs, ulong addr, bool w, int i)
{
int emulated = EMULATE_FAIL;
@@ -318,10 +318,10 @@
if (r < 0) {
kvmppc_inject_pf(vcpu, addr, true);
} else if ((r == EMULATE_DO_MMIO) && w) {
- emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1);
+ emulated = kvmppc_handle_store(vcpu, tmp[0], 4, 1);
} else if (r == EMULATE_DO_MMIO) {
u64 val = ((u64)tmp[0] << 32) | tmp[1];
- emulated = kvmppc_handle_store(run, vcpu, val, 8, 1);
+ emulated = kvmppc_handle_store(vcpu, val, 8, 1);
} else {
emulated = EMULATE_DONE;
}
@@ -618,7 +618,7 @@
return EMULATE_DONE;
}
-int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu)
{
u32 inst;
enum emulation_result emulated = EMULATE_DONE;
@@ -680,7 +680,7 @@
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
break;
}
case OP_PSQ_LU:
@@ -690,7 +690,7 @@
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -703,7 +703,7 @@
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
break;
}
case OP_PSQ_STU:
@@ -713,7 +713,7 @@
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -733,7 +733,7 @@
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
break;
}
case OP_4X_PS_CMPO0:
@@ -747,7 +747,7 @@
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -824,7 +824,7 @@
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
break;
}
case OP_4XW_PSQ_STUX:
@@ -834,7 +834,7 @@
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -922,7 +922,7 @@
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
break;
}
@@ -930,7 +930,7 @@
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -941,7 +941,7 @@
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
break;
}
@@ -949,7 +949,7 @@
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -960,7 +960,7 @@
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
break;
}
@@ -968,7 +968,7 @@
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -979,7 +979,7 @@
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
break;
}
@@ -987,7 +987,7 @@
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -1001,7 +1001,7 @@
ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
break;
}
@@ -1010,7 +1010,7 @@
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -1022,7 +1022,7 @@
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
break;
}
@@ -1031,7 +1031,7 @@
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -1043,7 +1043,7 @@
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
break;
}
@@ -1052,7 +1052,7 @@
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -1064,7 +1064,7 @@
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
break;
}
@@ -1073,7 +1073,7 @@
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -1085,7 +1085,7 @@
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr,
FPU_LS_SINGLE_LOW);
break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3f6ad3f..b1fefa6 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -90,7 +90,43 @@
kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
}
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+ ulong pc = kvmppc_get_pc(vcpu);
+ ulong lr = kvmppc_get_lr(vcpu);
+ if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+ if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+ }
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ kvmppc_unfixup_split_real(vcpu);
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+#endif
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ kvmppc_set_msr(vcpu, new_msr);
+}
static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
{
@@ -533,7 +569,7 @@
#endif
}
-void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
+static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
{
u32 host_pvr;
@@ -664,7 +700,7 @@
return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
}
-int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu,
ulong eaddr, int vec)
{
bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -704,7 +740,7 @@
(vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
pte.raddr &= ~SPLIT_HACK_MASK;
- /* fall through */
+ fallthrough;
case MSR_IR:
vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -759,7 +795,7 @@
/* The guest's PTE is not mapped yet. Map on the host */
if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
/* Exit KVM if mapping failed */
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
if (data)
@@ -772,7 +808,7 @@
vcpu->stat.mmio_exits++;
vcpu->arch.paddr_accessed = pte.raddr;
vcpu->arch.vaddr_accessed = pte.eaddr;
- r = kvmppc_emulate_mmio(run, vcpu);
+ r = kvmppc_emulate_mmio(vcpu);
if ( r == RESUME_HOST_NV )
r = RESUME_HOST;
}
@@ -956,7 +992,7 @@
enum emulation_result er = EMULATE_FAIL;
if (!(kvmppc_get_msr(vcpu) & MSR_PR))
- er = kvmppc_emulate_instruction(vcpu->run, vcpu);
+ er = kvmppc_emulate_instruction(vcpu);
if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
/* Couldn't emulate, trigger interrupt in guest */
@@ -1053,8 +1089,7 @@
}
}
-static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
enum emulation_result er;
ulong flags;
@@ -1088,7 +1123,7 @@
}
vcpu->stat.emulated_inst_exits++;
- er = kvmppc_emulate_instruction(run, vcpu);
+ er = kvmppc_emulate_instruction(vcpu);
switch (er) {
case EMULATE_DONE:
r = RESUME_GUEST_NV;
@@ -1103,7 +1138,7 @@
r = RESUME_GUEST;
break;
case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
r = RESUME_HOST_NV;
break;
case EMULATE_EXIT_USER:
@@ -1116,9 +1151,9 @@
return r;
}
-int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
@@ -1162,7 +1197,7 @@
/* only care about PTEG not found errors, but leave NX alone */
if (shadow_srr1 & 0x40000000) {
int idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+ r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -1212,7 +1247,7 @@
*/
if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
int idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+ r = kvmppc_handle_pagefault(vcpu, dar, exit_nr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
} else {
kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr);
@@ -1256,7 +1291,7 @@
break;
case BOOK3S_INTERRUPT_PROGRAM:
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
+ r = kvmppc_exit_pr_progint(vcpu, exit_nr);
break;
case BOOK3S_INTERRUPT_SYSCALL:
{
@@ -1334,7 +1369,7 @@
emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
&last_inst);
if (emul == EMULATE_DONE)
- r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
+ r = kvmppc_exit_pr_progint(vcpu, exit_nr);
else
r = RESUME_GUEST;
@@ -1708,21 +1743,17 @@
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s;
- struct kvm_vcpu *vcpu;
- int err = -ENOMEM;
unsigned long p;
+ int err;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+ err = -ENOMEM;
vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
if (!vcpu_book3s)
- goto free_vcpu;
+ goto out;
vcpu->arch.book3s = vcpu_book3s;
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -1732,14 +1763,9 @@
goto free_vcpu3s;
#endif
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_shadow_vcpu;
-
- err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!p)
- goto uninit_vcpu;
+ goto free_shadow_vcpu;
vcpu->arch.shared = (void *)p;
#ifdef CONFIG_PPC_BOOK3S_64
/* Always start the shared struct in native endian mode */
@@ -1761,57 +1787,50 @@
#else
/* default to book3s_32 (750) */
vcpu->arch.pvr = 0x84202;
+ vcpu->arch.intr_msr = 0;
#endif
kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
- err = kvmppc_mmu_init(vcpu);
+ err = kvmppc_mmu_init_pr(vcpu);
if (err < 0)
goto free_shared_page;
- return vcpu;
+ return 0;
free_shared_page:
free_page((unsigned long)vcpu->arch.shared);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
free_shadow_vcpu:
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
free_vcpu3s:
#endif
vfree(vcpu_book3s);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ kvmppc_mmu_destroy_pr(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
- kvm_vcpu_uninit(vcpu);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
#endif
vfree(vcpu_book3s);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
-static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
{
int ret;
-#ifdef CONFIG_ALTIVEC
- unsigned long uninitialized_var(vrsave);
-#endif
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
- kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = -EINVAL;
goto out;
}
@@ -1838,7 +1857,7 @@
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
kvmppc_clear_debug(vcpu);
@@ -1862,7 +1881,6 @@
static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
struct kvm_vcpu *vcpu;
ulong ga, ga_end;
@@ -1872,15 +1890,12 @@
mutex_lock(&kvm->slots_lock);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
-
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -1905,7 +1920,8 @@
static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
return 0;
}
@@ -1919,19 +1935,11 @@
return;
}
-static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot)
{
return;
}
-static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
-
#ifdef CONFIG_PPC64
static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
struct kvm_ppc_smmu_info *info)
@@ -1995,6 +2003,7 @@
{
/* We should not get called */
BUG();
+ return 0;
}
#endif /* CONFIG_PPC64 */
@@ -2060,6 +2069,7 @@
.set_one_reg = kvmppc_set_one_reg_pr,
.vcpu_load = kvmppc_core_vcpu_load_pr,
.vcpu_put = kvmppc_core_vcpu_put_pr,
+ .inject_interrupt = kvmppc_inject_interrupt_pr,
.set_msr = kvmppc_set_msr_pr,
.vcpu_run = kvmppc_vcpu_run_pr,
.vcpu_create = kvmppc_core_vcpu_create_pr,
@@ -2073,9 +2083,7 @@
.age_hva = kvm_age_hva_pr,
.test_age_hva = kvm_test_age_hva_pr,
.set_spte_hva = kvm_set_spte_hva_pr,
- .mmu_destroy = kvmppc_mmu_destroy_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
- .create_memslot = kvmppc_core_create_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,
.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 41137ec..0f847f1 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -229,7 +229,9 @@
*/
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto fail;
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 0169bab..1f492aa 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -167,16 +167,9 @@
* R12 = (guest CR << 32) | exit handler id
* R13 = PACA
* HSTATE.SCRATCH0 = guest R12
- * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE
*/
#ifdef CONFIG_PPC64
/* Match 32-bit entry */
-#ifdef CONFIG_RELOCATABLE
- std r9, HSTATE_SCRATCH2(r13)
- ld r9, HSTATE_SCRATCH1(r13)
- mtctr r9
- ld r9, HSTATE_SCRATCH2(r13)
-#endif
rotldi r12, r12, 32 /* Flip R12 halves for stw */
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
srdi r12, r12, 32 /* shift trap into low half */
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 381bf8d..5fee5a1 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1334,47 +1334,97 @@
return -ENXIO;
}
-static void kvmppc_xics_free(struct kvm_device *dev)
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xics_release(struct kvm_device *dev)
{
struct kvmppc_xics *xics = dev->private;
int i;
struct kvm *kvm = xics->kvm;
+ struct kvm_vcpu *vcpu;
+
+ pr_devel("Releasing xics device\n");
+
+ /*
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd referring to the
+ * device. Therefore there can not be any of the device
+ * attribute set/get functions being executed concurrently,
+ * and similarly, the connect_vcpu and set/clr_mapped
+ * functions also cannot be being executed.
+ */
debugfs_remove(xics->dentry);
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently.
+ * Holding the vcpu->mutex also means that execution is
+ * excluded for the vcpu until the ICP was freed. When the vcpu
+ * can execute again, vcpu->arch.icp and vcpu->arch.irq_type
+ * have been cleared and the vcpu will not be going into the
+ * XICS code anymore.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xics_free_icp(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
if (kvm)
kvm->arch.xics = NULL;
- for (i = 0; i <= xics->max_icsid; i++)
+ for (i = 0; i <= xics->max_icsid; i++) {
kfree(xics->ics[i]);
- kfree(xics);
+ xics->ics[i] = NULL;
+ }
+ /*
+ * A reference of the kvmppc_xics pointer is now kept under
+ * the xics_device pointer of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
kfree(dev);
}
+static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm)
+{
+ struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device;
+ struct kvmppc_xics *xics = *kvm_xics_device;
+
+ if (!xics) {
+ xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ *kvm_xics_device = xics;
+ } else {
+ memset(xics, 0, sizeof(*xics));
+ }
+
+ return xics;
+}
+
static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xics *xics;
struct kvm *kvm = dev->kvm;
- int ret = 0;
- xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ pr_devel("Creating xics for partition\n");
+
+ /* Already there ? */
+ if (kvm->arch.xics)
+ return -EEXIST;
+
+ xics = kvmppc_xics_get_device(kvm);
if (!xics)
return -ENOMEM;
dev->private = xics;
xics->dev = dev;
xics->kvm = kvm;
-
- /* Already there ? */
- if (kvm->arch.xics)
- ret = -EEXIST;
- else
- kvm->arch.xics = xics;
-
- if (ret) {
- kfree(xics);
- return ret;
- }
+ kvm->arch.xics = xics;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
@@ -1399,7 +1449,7 @@
.name = "kvm-xics",
.create = kvmppc_xics_create,
.init = kvmppc_xics_init,
- .destroy = kvmppc_xics_free,
+ .release = kvmppc_xics_release,
.set_attr = xics_set_attr,
.get_attr = xics_get_attr,
.has_attr = xics_has_attr,
@@ -1415,7 +1465,7 @@
return -EPERM;
if (xics->kvm != vcpu->kvm)
return -EPERM;
- if (vcpu->arch.irq_type)
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
r = kvmppc_xics_create_icp(vcpu, xcpu);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index baa7408..a0ebc29 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -484,7 +484,7 @@
kvmppc_xive_select_irq(state, &hw_num, &xd);
/*
- * See command in xive_lock_and_mask() concerning masking
+ * See comment in xive_lock_and_mask() concerning masking
* via firmware.
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
@@ -1211,6 +1211,42 @@
vcpu->arch.xive_vcpu = NULL;
}
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+ /* We have a block of xive->nr_servers VPs. We just need to check
+ * packed vCPU ids are below that.
+ */
+ return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+ u32 vp_id;
+
+ if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ if (xive->vp_base == XIVE_INVALID_VP) {
+ xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+ pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ return -ENOSPC;
+ }
+
+ vp_id = kvmppc_xive_vp(xive, cpu);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+
+ *vp = vp_id;
+
+ return 0;
+}
+
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
@@ -1229,20 +1265,13 @@
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
/* We need to synchronize with queue provisioning */
mutex_lock(&xive->lock);
- vp_id = kvmppc_xive_vp(xive, cpu);
- if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
- pr_devel("Duplicate !\n");
- r = -EEXIST;
+ r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+ if (r)
goto bail;
- }
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
@@ -1834,6 +1863,43 @@
return 0;
}
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+ u32 __user *ubufp = (u32 __user *) addr;
+ u32 nr_servers;
+ int rc = 0;
+
+ if (get_user(nr_servers, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+ return -EINVAL;
+
+ mutex_lock(&xive->lock);
+ if (xive->vp_base != XIVE_INVALID_VP)
+ /* The VP block is allocated once and freed when the device
+ * is released. Better not allow to change its size since its
+ * used by connect_vcpu to validate vCPU ids are valid (eg,
+ * setting it back to a higher value could allow connect_vcpu
+ * to come up with a VP id that goes beyond the VP block, which
+ * is likely to cause a crash in OPAL).
+ */
+ rc = -EBUSY;
+ else if (nr_servers > KVM_MAX_VCPUS)
+ /* We don't need more servers. Higher vCPU ids get packed
+ * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+ else
+ xive->nr_servers = nr_servers;
+
+ mutex_unlock(&xive->lock);
+
+ return rc;
+}
+
static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xive *xive = dev->private;
@@ -1842,6 +1908,11 @@
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xive_set_source(xive, attr->attr, attr->addr);
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
+ }
}
return -ENXIO;
}
@@ -1867,6 +1938,11 @@
attr->attr < KVMPPC_XICS_NR_IRQS)
return 0;
break;
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return 0;
+ }
}
return -ENXIO;
}
@@ -2001,7 +2077,6 @@
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive for partition\n");
@@ -2025,18 +2100,15 @@
else
xive->q_page_order = xive->q_order - PAGE_SHIFT;
- /* Allocate a bunch of VPs */
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENOMEM;
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
- if (ret)
- return ret;
-
kvm->arch.xive = xive;
return 0;
}
@@ -2107,9 +2179,9 @@
if (!xc)
continue;
- seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+ seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
" MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
- xc->server_num, xc->cppr, xc->hw_cppr,
+ xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index fe3ed50..382e3a5 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -135,6 +135,9 @@
/* Flags */
u8 single_escalation;
+ /* Number of entries in the VP block */
+ u32 nr_servers;
+
struct kvmppc_xive_ops *ops;
struct address_space *mapping;
struct mutex mapping_lock;
@@ -296,6 +299,8 @@
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index d78d848..a59a94f 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -31,6 +31,12 @@
{
u64 val;
+ /*
+ * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
+ * load operation, so there is no need to enforce load-after-store
+ * ordering.
+ */
+
if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
offset |= offset << 4;
@@ -136,19 +142,12 @@
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
mutex_lock(&xive->lock);
- vp_id = kvmppc_xive_vp(xive, server_num);
- if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
- pr_devel("Duplicate !\n");
- rc = -EEXIST;
+ rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+ if (rc)
goto bail;
- }
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
@@ -949,6 +948,8 @@
return kvmppc_xive_reset(xive);
case KVM_DEV_XIVE_EQ_SYNC:
return kvmppc_xive_native_eq_sync(xive);
+ case KVM_DEV_XIVE_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
}
break;
case KVM_DEV_XIVE_GRP_SOURCE:
@@ -988,6 +989,7 @@
switch (attr->attr) {
case KVM_DEV_XIVE_RESET:
case KVM_DEV_XIVE_EQ_SYNC:
+ case KVM_DEV_XIVE_NR_SERVERS:
return 0;
}
break;
@@ -1088,7 +1090,6 @@
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive native device\n");
@@ -1105,23 +1106,16 @@
mutex_init(&xive->mapping_lock);
mutex_init(&xive->lock);
- /*
- * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
- * a default. Getting the max number of CPUs the VM was
- * configured with would improve our usage of the XIVE VP space.
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
*/
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENXIO;
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
xive->ops = &kvmppc_xive_native_ops;
- if (ret)
- return ret;
-
kvm->arch.xive = xive;
return 0;
}
@@ -1225,8 +1219,8 @@
if (!xc)
continue;
- seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
- xc->server_num,
+ seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num, xc->vp_id,
vcpu->arch.xive_saved_state.nsr,
vcpu->arch.xive_saved_state.cppr,
vcpu->arch.xive_saved_state.ipb,
@@ -1240,17 +1234,7 @@
return 0;
}
-static int xive_native_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xive_native_debug_show, inode->i_private);
-}
-
-static const struct file_operations xive_native_debug_fops = {
- .open = xive_native_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
static void xive_native_debugfs_init(struct kvmppc_xive *xive)
{
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index a8a900a..4ad3c02 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -58,6 +58,9 @@
{
u64 val;
+ if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ offset |= XIVE_ESB_LD_ST_MO;
+
if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
offset |= offset << 4;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index be9a458..b1abcb8 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -35,29 +35,28 @@
unsigned long kvmppc_booke_handlers;
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "mmio", VCPU_STAT(mmio_exits) },
- { "sig", VCPU_STAT(signal_exits) },
- { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
- { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
- { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
- { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
- { "sysc", VCPU_STAT(syscall_exits) },
- { "isi", VCPU_STAT(isi_exits) },
- { "dsi", VCPU_STAT(dsi_exits) },
- { "inst_emu", VCPU_STAT(emulated_inst_exits) },
- { "dec", VCPU_STAT(dec_exits) },
- { "ext_intr", VCPU_STAT(ext_intr_exits) },
- { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "doorbell", VCPU_STAT(dbell_exits) },
- { "guest doorbell", VCPU_STAT(gdbell_exits) },
- { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+ VCPU_STAT("mmio", mmio_exits),
+ VCPU_STAT("sig", signal_exits),
+ VCPU_STAT("itlb_r", itlb_real_miss_exits),
+ VCPU_STAT("itlb_v", itlb_virt_miss_exits),
+ VCPU_STAT("dtlb_r", dtlb_real_miss_exits),
+ VCPU_STAT("dtlb_v", dtlb_virt_miss_exits),
+ VCPU_STAT("sysc", syscall_exits),
+ VCPU_STAT("isi", isi_exits),
+ VCPU_STAT("dsi", dsi_exits),
+ VCPU_STAT("inst_emu", emulated_inst_exits),
+ VCPU_STAT("dec", dec_exits),
+ VCPU_STAT("ext_intr", ext_intr_exits),
+ VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ VCPU_STAT("halt_wakeup", halt_wakeup),
+ VCPU_STAT("doorbell", dbell_exits),
+ VCPU_STAT("guest doorbell", gdbell_exits),
+ VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+ VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ VM_STAT("remote_tlb_flush", remote_tlb_flush),
{ NULL }
};
@@ -421,11 +420,11 @@
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_ALIGNMENT:
update_dear = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_PROGRAM:
update_esr = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
@@ -459,7 +458,7 @@
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
keep_irq = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DBELL:
allowed = vcpu->arch.shared->msr & MSR_EE;
@@ -730,13 +729,13 @@
return r;
}
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
{
int ret, s;
struct debug_reg debug;
if (!vcpu->arch.sane) {
- kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
@@ -775,10 +774,10 @@
debug = current->thread.debug;
current->thread.debug = vcpu->arch.dbg_reg;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
/* No need for guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
@@ -800,11 +799,11 @@
return ret;
}
-static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int emulation_exit(struct kvm_vcpu *vcpu)
{
enum emulation_result er;
- er = kvmppc_emulate_instruction(run, vcpu);
+ er = kvmppc_emulate_instruction(vcpu);
switch (er) {
case EMULATE_DONE:
/* don't overwrite subtypes, just account kvm_stats */
@@ -821,8 +820,8 @@
__func__, vcpu->arch.regs.nip, vcpu->arch.last_inst);
/* For debugging, encode the failing instruction and
* report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+ vcpu->run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
@@ -834,8 +833,9 @@
}
}
-static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_handle_debug(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *run = vcpu->run;
struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
u32 dbsr = vcpu->arch.dbsr;
@@ -954,7 +954,7 @@
}
}
-static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu,
enum emulation_result emulated, u32 last_inst)
{
switch (emulated) {
@@ -966,8 +966,8 @@
__func__, vcpu->arch.regs.nip);
/* For debugging, encode the failing instruction and
* report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= last_inst;
+ vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+ vcpu->run->hw.hardware_exit_reason |= last_inst;
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
@@ -981,9 +981,9 @@
*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
int idx;
@@ -1024,7 +1024,7 @@
run->ready_for_interrupt_injection = 1;
if (emulated != EMULATE_DONE) {
- r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst);
+ r = kvmppc_resume_inst_load(vcpu, emulated, last_inst);
goto out;
}
@@ -1084,7 +1084,7 @@
break;
case BOOKE_INTERRUPT_HV_PRIV:
- r = emulation_exit(run, vcpu);
+ r = emulation_exit(vcpu);
break;
case BOOKE_INTERRUPT_PROGRAM:
@@ -1094,7 +1094,7 @@
* We are here because of an SW breakpoint instr,
* so lets return to host to handle.
*/
- r = kvmppc_handle_debug(run, vcpu);
+ r = kvmppc_handle_debug(vcpu);
run->exit_reason = KVM_EXIT_DEBUG;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
break;
@@ -1115,7 +1115,7 @@
break;
}
- r = emulation_exit(run, vcpu);
+ r = emulation_exit(vcpu);
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -1282,7 +1282,7 @@
* actually RAM. */
vcpu->arch.paddr_accessed = gpaddr;
vcpu->arch.vaddr_accessed = eaddr;
- r = kvmppc_emulate_mmio(run, vcpu);
+ r = kvmppc_emulate_mmio(vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
@@ -1333,7 +1333,7 @@
}
case BOOKE_INTERRUPT_DEBUG: {
- r = kvmppc_handle_debug(run, vcpu);
+ r = kvmppc_handle_debug(vcpu);
if (r == RESUME_HOST)
run->exit_reason = KVM_EXIT_DEBUG;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
@@ -1377,36 +1377,6 @@
update_timer_ints(vcpu);
}
-/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- int i;
- int r;
-
- vcpu->arch.regs.nip = 0;
- vcpu->arch.shared->pir = vcpu->vcpu_id;
- kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
- kvmppc_set_msr(vcpu, 0);
-
-#ifndef CONFIG_KVM_BOOKE_HV
- vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
- vcpu->arch.shadow_pid = 1;
- vcpu->arch.shared->msr = 0;
-#endif
-
- /* Eye-catching numbers so we know if the guest takes an interrupt
- * before it's programmed its own IVPR/IVORs. */
- vcpu->arch.ivpr = 0x55550000;
- for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
- vcpu->arch.ivor[i] = 0x7700 | i * 4;
-
- kvmppc_init_timing_stats(vcpu);
-
- r = kvmppc_core_vcpu_setup(vcpu);
- kvmppc_sanity_check(vcpu);
- return r;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
/* setup watchdog timer once */
@@ -1777,12 +1747,12 @@
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -1796,25 +1766,24 @@
return r;
}
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+
+}
+
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
-void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
}
-int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
return 0;
}
@@ -2104,19 +2073,45 @@
kvmppc_clear_dbsr();
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
return kvm->arch.kvm_ops->init_vm(kvm);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ int i;
+ int r;
+
+ r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
+ if (r)
+ return r;
+
+ /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+ vcpu->arch.regs.nip = 0;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
+ vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
+
+ /* Eye-catching numbers so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR/IVORs. */
+ vcpu->arch.ivpr = 0x55550000;
+ for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+ vcpu->arch.ivor[i] = 0x7700 | i * 4;
+
+ kvmppc_init_timing_stats(vcpu);
+
+ r = kvmppc_core_vcpu_setup(vcpu);
+ if (r)
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 9d3169f..be9da96 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -70,7 +70,7 @@
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
-int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
@@ -94,18 +94,12 @@
void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
-extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
- struct kvm_vcpu *vcpu,
+extern int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong spr_val);
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong *spr_val);
-extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
- struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance);
extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong spr_val);
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 689ff5f..d8d38ac 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -39,7 +39,7 @@
kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
}
-int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 2e56ab5..6fa82ef 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -237,7 +237,7 @@
/* Switch to kernel stack and jump to handler. */
LOAD_REG_ADDR(r3, kvmppc_handle_exit)
mtctr r3
- lwz r3, HOST_RUN(r1)
+ mr r3, r4
lwz r2, HOST_R2(r1)
mr r14, r4 /* Save vcpu pointer. */
@@ -337,15 +337,14 @@
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- stw r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
stw r3, HOST_STACK_LR(r1)
mfcr r5
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index c577ba4..8262c14 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -434,9 +434,10 @@
#endif
/* Switch to kernel stack and jump to handler. */
- PPC_LL r3, HOST_RUN(r1)
+ mr r3, r4
mr r5, r14 /* intno */
mr r14, r4 /* Save vcpu pointer. */
+ mr r4, r5
bl kvmppc_handle_exit
/* Restore vcpu pointer and the nonvolatiles we used. */
@@ -525,15 +526,14 @@
blr
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- PPC_STL r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
mfcr r5
PPC_STL r3, HOST_STACK_LR(r1)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 00649ca..7e8b690 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -433,31 +433,16 @@
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0,
- "struct kvm_vcpu must be at offset 0 for arch usercopy region");
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
-
- vcpu = &vcpu_e500->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
- if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
- err = -ENOMEM;
- goto uninit_vcpu;
- }
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ return -ENOMEM;
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
@@ -469,18 +454,13 @@
goto uninit_tlb;
}
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
uninit_id:
kvmppc_e500_id_table_free(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
@@ -490,8 +470,6 @@
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
kvmppc_e500_id_table_free(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500(struct kvm *kvm)
@@ -512,7 +490,6 @@
.vcpu_put = kvmppc_core_vcpu_put_e500,
.vcpu_create = kvmppc_core_vcpu_create_e500,
.vcpu_free = kvmppc_core_vcpu_free_e500,
- .mmu_destroy = kvmppc_mmu_destroy_e500,
.init_vm = kvmppc_core_init_vm_e500,
.destroy_vm = kvmppc_core_destroy_vm_e500,
.emulate_op = kvmppc_core_emulate_op_e500,
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 3d0d3ec..64eb833 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -83,16 +83,16 @@
}
#endif
-static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_e500_emul_ehpriv(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
switch (get_oc(inst)) {
case EHPRIV_OC_DEBUG:
- run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.address = vcpu->arch.regs.nip;
- run->debug.arch.status = 0;
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.address = vcpu->arch.regs.nip;
+ vcpu->run->debug.arch.status = 0;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
emulated = EMULATE_EXIT_USER;
*advance = 0;
@@ -125,7 +125,7 @@
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
@@ -182,8 +182,7 @@
break;
case XOP_EHPRIV:
- emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
- advance);
+ emulated = kvmppc_e500_emul_ehpriv(vcpu, inst, advance);
break;
default:
@@ -197,7 +196,7 @@
}
if (emulated == EMULATE_FAIL)
- emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+ emulated = kvmppc_booke_emulate_op(vcpu, inst, advance);
return emulated;
}
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 2d910b8..e131fbe 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -533,10 +533,6 @@
return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
}
-void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
-{
-}
-
/*****************************************/
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 7154bd4..ed0c9c4 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -355,9 +355,9 @@
if (tlbsel == 1) {
struct vm_area_struct *vma;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
- vma = find_vma(current->mm, hva);
+ vma = find_vma(kvm->mm, hva);
if (vma && hva >= vma->vm_start &&
(vma->vm_flags & VM_PFNMAP)) {
/*
@@ -422,7 +422,7 @@
break;
}
} else if (vma && hva >= vma->vm_start &&
- (vma->vm_flags & VM_HUGETLB)) {
+ is_vm_hugetlb_page(vma)) {
unsigned long psize = vma_kernel_pagesize(vma);
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
@@ -441,7 +441,7 @@
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
}
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
}
if (likely(!pfnmap)) {
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 318e65c..1c189b5 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -301,30 +301,20 @@
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
- vcpu = &vcpu_e500->vcpu;
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
/* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
vcpu->arch.oldpir = 0xffffffff;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ return err;
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
if (!vcpu->arch.shared) {
@@ -332,17 +322,11 @@
goto uninit_tlb;
}
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
@@ -351,8 +335,6 @@
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
@@ -394,7 +376,6 @@
.vcpu_put = kvmppc_core_vcpu_put_e500mc,
.vcpu_create = kvmppc_core_vcpu_create_e500mc,
.vcpu_free = kvmppc_core_vcpu_free_e500mc,
- .mmu_destroy = kvmppc_mmu_destroy_e500,
.init_vm = kvmppc_core_init_vm_e500mc,
.destroy_vm = kvmppc_core_destroy_vm_e500mc,
.emulate_op = kvmppc_core_emulate_op_e500,
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 6fca38c..ee1147c 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -191,7 +191,7 @@
/* XXX Should probably auto-generate instruction decoding for a particular core
* from opcode tables in the future. */
-int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu)
{
u32 inst;
int rs, rt, sprn;
@@ -270,9 +270,9 @@
* these are illegal instructions.
*/
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
- run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.status = 0;
- run->debug.arch.address = kvmppc_get_pc(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.status = 0;
+ vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
emulated = EMULATE_EXIT_USER;
advance = 0;
} else
@@ -285,7 +285,7 @@
}
if (emulated == EMULATE_FAIL) {
- emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_op(vcpu, inst,
&advance);
if (emulated == EMULATE_AGAIN) {
advance = 0;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 1139bc5..48272a9 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -71,7 +71,6 @@
*/
int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu->run;
u32 inst;
enum emulation_result emulated = EMULATE_FAIL;
int advance = 1;
@@ -95,7 +94,7 @@
emulated = EMULATE_FAIL;
vcpu->arch.regs.msr = vcpu->arch.shared->msr;
- if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
+ if (analyse_instr(&op, &vcpu->arch.regs, ppc_inst(inst)) == 0) {
int type = op.type & INSTR_TYPE_MASK;
int size = GETSIZE(op.type);
@@ -104,10 +103,10 @@
int instr_byte_swap = op.type & BYTEREV;
if (op.type & SIGNEXT)
- emulated = kvmppc_handle_loads(run, vcpu,
+ emulated = kvmppc_handle_loads(vcpu,
op.reg, size, !instr_byte_swap);
else
- emulated = kvmppc_handle_load(run, vcpu,
+ emulated = kvmppc_handle_load(vcpu,
op.reg, size, !instr_byte_swap);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
@@ -124,10 +123,10 @@
vcpu->arch.mmio_sp64_extend = 1;
if (op.type & SIGNEXT)
- emulated = kvmppc_handle_loads(run, vcpu,
+ emulated = kvmppc_handle_loads(vcpu,
KVM_MMIO_REG_FPR|op.reg, size, 1);
else
- emulated = kvmppc_handle_load(run, vcpu,
+ emulated = kvmppc_handle_load(vcpu,
KVM_MMIO_REG_FPR|op.reg, size, 1);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
@@ -164,12 +163,12 @@
if (size == 16) {
vcpu->arch.mmio_vmx_copy_nums = 2;
- emulated = kvmppc_handle_vmx_load(run,
- vcpu, KVM_MMIO_REG_VMX|op.reg,
+ emulated = kvmppc_handle_vmx_load(vcpu,
+ KVM_MMIO_REG_VMX|op.reg,
8, 1);
} else {
vcpu->arch.mmio_vmx_copy_nums = 1;
- emulated = kvmppc_handle_vmx_load(run, vcpu,
+ emulated = kvmppc_handle_vmx_load(vcpu,
KVM_MMIO_REG_VMX|op.reg,
size, 1);
}
@@ -217,7 +216,7 @@
io_size_each = op.element_size;
}
- emulated = kvmppc_handle_vsx_load(run, vcpu,
+ emulated = kvmppc_handle_vsx_load(vcpu,
KVM_MMIO_REG_VSX|op.reg, io_size_each,
1, op.type & SIGNEXT);
break;
@@ -227,8 +226,7 @@
/* if need byte reverse, op.val has been reversed by
* analyse_instr().
*/
- emulated = kvmppc_handle_store(run, vcpu, op.val,
- size, 1);
+ emulated = kvmppc_handle_store(vcpu, op.val, size, 1);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
@@ -250,7 +248,7 @@
if (op.type & FPCONV)
vcpu->arch.mmio_sp64_extend = 1;
- emulated = kvmppc_handle_store(run, vcpu,
+ emulated = kvmppc_handle_store(vcpu,
VCPU_FPR(vcpu, op.reg), size, 1);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
@@ -290,12 +288,12 @@
if (size == 16) {
vcpu->arch.mmio_vmx_copy_nums = 2;
- emulated = kvmppc_handle_vmx_store(run,
- vcpu, op.reg, 8, 1);
+ emulated = kvmppc_handle_vmx_store(vcpu,
+ op.reg, 8, 1);
} else {
vcpu->arch.mmio_vmx_copy_nums = 1;
- emulated = kvmppc_handle_vmx_store(run,
- vcpu, op.reg, size, 1);
+ emulated = kvmppc_handle_vmx_store(vcpu,
+ op.reg, size, 1);
}
break;
@@ -338,7 +336,7 @@
io_size_each = op.element_size;
}
- emulated = kvmppc_handle_vsx_store(run, vcpu,
+ emulated = kvmppc_handle_vsx_store(vcpu,
op.reg, io_size_each, 1);
break;
}
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
index 3dfae0c..315c949 100644
--- a/arch/powerpc/kvm/fpu.S
+++ b/arch/powerpc/kvm/fpu.S
@@ -5,10 +5,10 @@
* Copyright (C) 2010 Alexander Graf (agraf@suse.de)
*/
+#include <linux/pgtable.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index fe312c1..23e9c2b 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -32,7 +32,6 @@
#include <linux/uaccess.h>
#include <asm/mpic.h>
#include <asm/kvm_para.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_ppc.h>
#include <kvm/iodev.h>
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8dd4d2b..543db91 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -31,6 +31,7 @@
#include <asm/hvcall.h>
#include <asm/plpar_wrappers.h>
#endif
+#include <asm/ultravisor.h>
#include "timing.h"
#include "irq.h"
@@ -278,7 +279,7 @@
}
EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
-int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu)
{
enum emulation_result er;
int r;
@@ -294,7 +295,7 @@
r = RESUME_GUEST;
break;
case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
/* We must reload nonvolatiles because "update" load/store
* instructions modify register state. */
/* Future optimization: only reload non-volatiles if they were
@@ -402,7 +403,10 @@
return EMULATE_DONE;
}
- if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ if (rc)
return EMULATE_DO_MMIO;
return EMULATE_DONE;
@@ -414,12 +418,12 @@
return 0;
}
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return kvmppc_core_check_processor_compat();
}
@@ -473,7 +477,7 @@
#endif
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_free(vcpu);
+ kvm_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -520,8 +524,10 @@
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
+ case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
@@ -667,6 +673,12 @@
(hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
break;
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ case KVM_CAP_PPC_SECURE_GUEST:
+ r = hv_enabled && kvmppc_hv_ops->enable_svm &&
+ !kvmppc_hv_ops->enable_svm(NULL);
+ break;
+#endif
default:
r = 0;
break;
@@ -681,16 +693,9 @@
return -EINVAL;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- kvmppc_core_free_memslot(kvm, free, dont);
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return kvmppc_core_create_memslot(kvm, slot, npages);
+ kvmppc_core_free_memslot(kvm, slot);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -698,12 +703,12 @@
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
+ return kvmppc_core_prepare_memory_region(kvm, memslot, mem, change);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -716,22 +721,54 @@
kvmppc_core_flush_memslot(kvm, slot);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- vcpu = kvmppc_core_vcpu_create(kvm, id);
- if (!IS_ERR(vcpu)) {
- vcpu->arch.wqp = &vcpu->wq;
- kvmppc_create_vcpu_debugfs(vcpu, id);
- }
- return vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+ kvmppc_decrementer_func(vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ int err;
+
+ hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+ vcpu->arch.dec_expires = get_tb();
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+ err = kvmppc_subarch_vcpu_init(vcpu);
+ if (err)
+ return err;
+
+ err = kvmppc_core_vcpu_create(vcpu);
+ if (err)
+ goto out_vcpu_uninit;
+
+ vcpu->arch.waitp = &vcpu->wait;
+ kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
+ return 0;
+
+out_vcpu_uninit:
+ kvmppc_subarch_vcpu_uninit(vcpu);
+ return err;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
@@ -754,11 +791,8 @@
}
kvmppc_core_vcpu_free(vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -766,37 +800,6 @@
return kvmppc_core_pending_dec(vcpu);
}
-static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
- kvmppc_decrementer_func(vcpu);
-
- return HRTIMER_NORESTART;
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = get_tb();
-
-#ifdef CONFIG_KVM_EXIT_TIMING
- mutex_init(&vcpu->arch.exit_timing_lock);
-#endif
- ret = kvmppc_subarch_vcpu_init(vcpu);
- return ret;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- kvmppc_mmu_destroy(vcpu);
- kvmppc_subarch_vcpu_uninit(vcpu);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
@@ -1107,10 +1110,10 @@
#define dp_to_sp(x) (x)
#endif /* CONFIG_PPC_FPU */
-static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu)
{
- u64 uninitialized_var(gpr);
+ struct kvm_run *run = vcpu->run;
+ u64 gpr;
if (run->mmio.len > sizeof(gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -1219,10 +1222,11 @@
}
}
-static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int __kvmppc_handle_load(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian, int sign_extend)
{
+ struct kvm_run *run = vcpu->run;
int idx, ret;
bool host_swabbed;
@@ -1256,7 +1260,7 @@
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (!ret) {
- kvmppc_complete_mmio_load(vcpu, run);
+ kvmppc_complete_mmio_load(vcpu);
vcpu->mmio_needed = 0;
return EMULATE_DONE;
}
@@ -1264,24 +1268,24 @@
return EMULATE_DO_MMIO;
}
-int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_load(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian)
{
- return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0);
+ return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0);
}
EXPORT_SYMBOL_GPL(kvmppc_handle_load);
/* Same as above, but sign extends */
-int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian)
{
- return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
+ return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 1);
}
#ifdef CONFIG_VSX
-int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian, int mmio_sign_extend)
{
@@ -1292,13 +1296,13 @@
return EMULATE_FAIL;
while (vcpu->arch.mmio_vsx_copy_nums) {
- emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+ emulated = __kvmppc_handle_load(vcpu, rt, bytes,
is_default_endian, mmio_sign_extend);
if (emulated != EMULATE_DONE)
break;
- vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
vcpu->arch.mmio_vsx_copy_nums--;
vcpu->arch.mmio_vsx_offset++;
@@ -1307,9 +1311,10 @@
}
#endif /* CONFIG_VSX */
-int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_store(struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes, int is_default_endian)
{
+ struct kvm_run *run = vcpu->run;
void *data = run->mmio.data;
int idx, ret;
bool host_swabbed;
@@ -1423,7 +1428,7 @@
return result;
}
-int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
int rs, unsigned int bytes, int is_default_endian)
{
u64 val;
@@ -1439,13 +1444,13 @@
if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
return EMULATE_FAIL;
- emulated = kvmppc_handle_store(run, vcpu,
+ emulated = kvmppc_handle_store(vcpu,
val, bytes, is_default_endian);
if (emulated != EMULATE_DONE)
break;
- vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
vcpu->arch.mmio_vsx_copy_nums--;
vcpu->arch.mmio_vsx_offset++;
@@ -1454,19 +1459,19 @@
return emulated;
}
-static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *run = vcpu->run;
enum emulation_result emulated = EMULATE_FAIL;
int r;
vcpu->arch.paddr_accessed += run->mmio.len;
if (!vcpu->mmio_is_write) {
- emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr,
+ emulated = kvmppc_handle_vsx_load(vcpu, vcpu->arch.io_gpr,
run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
} else {
- emulated = kvmppc_handle_vsx_store(run, vcpu,
+ emulated = kvmppc_handle_vsx_store(vcpu,
vcpu->arch.io_gpr, run->mmio.len, 1);
}
@@ -1490,7 +1495,7 @@
#endif /* CONFIG_VSX */
#ifdef CONFIG_ALTIVEC
-int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes, int is_default_endian)
{
enum emulation_result emulated = EMULATE_DONE;
@@ -1499,13 +1504,13 @@
return EMULATE_FAIL;
while (vcpu->arch.mmio_vmx_copy_nums) {
- emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+ emulated = __kvmppc_handle_load(vcpu, rt, bytes,
is_default_endian, 0);
if (emulated != EMULATE_DONE)
break;
- vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
vcpu->arch.mmio_vmx_copy_nums--;
vcpu->arch.mmio_vmx_offset++;
}
@@ -1585,7 +1590,7 @@
return result;
}
-int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
unsigned int rs, unsigned int bytes, int is_default_endian)
{
u64 val = 0;
@@ -1620,12 +1625,12 @@
return EMULATE_FAIL;
}
- emulated = kvmppc_handle_store(run, vcpu, val, bytes,
+ emulated = kvmppc_handle_store(vcpu, val, bytes,
is_default_endian);
if (emulated != EMULATE_DONE)
break;
- vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
vcpu->arch.mmio_vmx_copy_nums--;
vcpu->arch.mmio_vmx_offset++;
}
@@ -1633,19 +1638,19 @@
return emulated;
}
-static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *run = vcpu->run;
enum emulation_result emulated = EMULATE_FAIL;
int r;
vcpu->arch.paddr_accessed += run->mmio.len;
if (!vcpu->mmio_is_write) {
- emulated = kvmppc_handle_vmx_load(run, vcpu,
+ emulated = kvmppc_handle_vmx_load(vcpu,
vcpu->arch.io_gpr, run->mmio.len, 1);
} else {
- emulated = kvmppc_handle_vmx_store(run, vcpu,
+ emulated = kvmppc_handle_vmx_store(vcpu,
vcpu->arch.io_gpr, run->mmio.len, 1);
}
@@ -1765,8 +1770,9 @@
return r;
}
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *run = vcpu->run;
int r;
vcpu_load(vcpu);
@@ -1774,7 +1780,7 @@
if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0;
if (!vcpu->mmio_is_write)
- kvmppc_complete_mmio_load(vcpu, run);
+ kvmppc_complete_mmio_load(vcpu);
#ifdef CONFIG_VSX
if (vcpu->arch.mmio_vsx_copy_nums > 0) {
vcpu->arch.mmio_vsx_copy_nums--;
@@ -1782,7 +1788,7 @@
}
if (vcpu->arch.mmio_vsx_copy_nums > 0) {
- r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
+ r = kvmppc_emulate_mmio_vsx_loadstore(vcpu);
if (r == RESUME_HOST) {
vcpu->mmio_needed = 1;
goto out;
@@ -1796,7 +1802,7 @@
}
if (vcpu->arch.mmio_vmx_copy_nums > 0) {
- r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
+ r = kvmppc_emulate_mmio_vmx_loadstore(vcpu);
if (r == RESUME_HOST) {
vcpu->mmio_needed = 1;
goto out;
@@ -1829,7 +1835,7 @@
if (run->immediate_exit)
r = -EINTR;
else
- r = kvmppc_vcpu_run(run, vcpu);
+ r = kvmppc_vcpu_run(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -2174,6 +2180,14 @@
r = kvm->arch.kvm_ops->enable_nested(kvm);
break;
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ case KVM_CAP_PPC_SECURE_GUEST:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_svm)
+ break;
+ r = kvm->arch.kvm_ops->enable_svm(kvm);
+ break;
+#endif
default:
r = -EINVAL;
break;
@@ -2411,6 +2425,16 @@
r = -EFAULT;
break;
}
+ case KVM_PPC_SVM_OFF: {
+ struct kvm *kvm = filp->private_data;
+
+ r = 0;
+ if (!kvm->arch.kvm_ops->svm_off)
+ goto out;
+
+ r = kvm->arch.kvm_ops->svm_off(kvm);
+ break;
+ }
default: {
struct kvm *kvm = filp->private_data;
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index bfe4f10..ba56a5c 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -211,23 +211,14 @@
snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
current->pid, id);
- debugfs_file = debugfs_create_file(dbg_fname, 0666,
- kvm_debugfs_dir, vcpu,
- &kvmppc_exit_timing_fops);
-
- if (!debugfs_file) {
- printk(KERN_ERR"%s: error creating debugfs file %s\n",
- __func__, dbg_fname);
- return;
- }
+ debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
+ vcpu, &kvmppc_exit_timing_fops);
vcpu->arch.debugfs_exit_timing = debugfs_file;
}
void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.debugfs_exit_timing) {
- debugfs_remove(vcpu->arch.debugfs_exit_timing);
- vcpu->arch.debugfs_exit_timing = NULL;
- }
+ debugfs_remove(vcpu->arch.debugfs_exit_timing);
+ vcpu->arch.debugfs_exit_timing = NULL;
}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index ace65f9..feef788 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -10,7 +10,6 @@
#define __POWERPC_KVM_EXITTIMING_H__
#include <linux/kvm_host.h>
-#include <asm/kvm_host.h>
#ifdef CONFIG_KVM_EXIT_TIMING
void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 8a1e3b0..830a126 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -89,7 +89,7 @@
{H_CREATE_RPT, "H_CREATE_RPT"}, \
{H_REMOVE_RPT, "H_REMOVE_RPT"}, \
{H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \
- {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \
+ {H_DISABLE_AND_GET, "H_DISABLE_AND_GET"}, \
{H_ERROR_DATA, "H_ERROR_DATA"}, \
{H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \
{H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \
@@ -472,9 +472,9 @@
);
TRACE_EVENT(kvmppc_run_vcpu_exit,
- TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+ TP_PROTO(struct kvm_vcpu *vcpu),
- TP_ARGS(vcpu, run),
+ TP_ARGS(vcpu),
TP_STRUCT__entry(
__field(int, vcpu_id)
@@ -484,7 +484,7 @@
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
- __entry->exit = run->exit_reason;
+ __entry->exit = vcpu->run->exit_reason;
__entry->ret = vcpu->arch.ret;
),