Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ec2547c..1ff971f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -867,7 +867,8 @@
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 2d415c3..9d73448 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -38,7 +38,8 @@
/* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
if (kvmhv_on_pseries())
return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
- __pa(to), __pa(from), n);
+ (to != NULL) ? __pa(to): 0,
+ (from != NULL) ? __pa(from): 0, n);
quadrant = 1;
if (!pid)
@@ -353,7 +354,13 @@
static pte_t *kvmppc_pte_alloc(void)
{
- return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
+ pte_t *pte;
+
+ pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
+ /* pmd_populate() will only reference _pa(pte). */
+ kmemleak_ignore(pte);
+
+ return pte;
}
static void kvmppc_pte_free(pte_t *ptep)
@@ -363,7 +370,13 @@
static pmd_t *kvmppc_pmd_alloc(void)
{
- return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+ pmd_t *pmd;
+
+ pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+ /* pud_populate() will only reference _pa(pmd). */
+ kmemleak_ignore(pmd);
+
+ return pmd;
}
static void kvmppc_pmd_free(pmd_t *pmdp)
@@ -1091,6 +1104,11 @@
kvm->arch.lpid);
gpa += PAGE_SIZE;
}
+ /*
+ * Increase the mmu notifier sequence number to prevent any page
+ * fault that read the memslot earlier from writing a PTE.
+ */
+ kvm->mmu_notifier_seq++;
spin_unlock(&kvm->mmu_lock);
}
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5834db0..03b9474 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -74,6 +74,7 @@
struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
struct iommu_table_group *table_group = NULL;
+ rcu_read_lock();
list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
table_group = iommu_group_get_iommudata(grp);
@@ -88,7 +89,9 @@
kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
}
}
+ cond_resched_rcu();
}
+ rcu_read_unlock();
}
extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
@@ -106,12 +109,14 @@
if (!f.file)
return -EBADF;
+ rcu_read_lock();
list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt == f.file->private_data) {
found = true;
break;
}
}
+ rcu_read_unlock();
fdput(f);
@@ -144,6 +149,7 @@
if (!tbl)
return -EINVAL;
+ rcu_read_lock();
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
if (tbl != stit->tbl)
continue;
@@ -151,14 +157,17 @@
if (!kref_get_unless_zero(&stit->kref)) {
/* stit is being destroyed */
iommu_tce_table_put(tbl);
+ rcu_read_unlock();
return -ENOTTY;
}
/*
* The table is already known to this KVM, we just increased
* its KVM reference counter and can return.
*/
+ rcu_read_unlock();
return 0;
}
+ rcu_read_unlock();
stit = kzalloc(sizeof(*stit), GFP_KERNEL);
if (!stit) {
@@ -364,18 +373,19 @@
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
return H_TOO_HARD;
+ rcu_read_lock();
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
- if (!mem)
+ if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
+ rcu_read_unlock();
return H_TOO_HARD;
-
- if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
- return H_TOO_HARD;
+ }
}
+ rcu_read_unlock();
return H_SUCCESS;
}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index ab6eeb8..35fd67b 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -177,10 +177,13 @@
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
/*
- * page must not be NULL in real mode,
- * kvmppc_rm_ioba_validate() must have taken care of this.
+ * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
+ * being cleared, otherwise it returns H_TOO_HARD and we skip this.
*/
- WARN_ON_ONCE_RM(!page);
+ if (!page) {
+ WARN_ON_ONCE_RM(tce != 0);
+ return;
+ }
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 709cf1f..6c99ccc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,7 @@
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/lppaca.h>
+#include <asm/pmc.h>
#include <asm/processor.h>
#include <asm/cputhreads.h>
#include <asm/page.h>
@@ -2306,8 +2307,10 @@
HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
if (cpu_has_feature(CPU_FTR_HVMODE)) {
vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
vcpu->arch.hfscr |= HFSCR_TM;
+#endif
}
if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
@@ -2354,7 +2357,7 @@
mutex_unlock(&kvm->lock);
if (!vcore)
- goto free_vcpu;
+ goto uninit_vcpu;
spin_lock(&vcore->lock);
++vcore->num_threads;
@@ -2371,6 +2374,8 @@
return vcpu;
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
@@ -2534,7 +2539,7 @@
cpumask_t *cpu_in_guest;
int i;
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
if (nested) {
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
cpu_in_guest = &nested->cpu_in_guest;
@@ -2548,9 +2553,10 @@
* the other side is the first smp_mb() in kvmppc_run_core().
*/
smp_mb();
- for (i = 0; i < threads_per_core; ++i)
- if (cpumask_test_cpu(cpu + i, cpu_in_guest))
- smp_call_function_single(cpu + i, do_nothing, NULL, 1);
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ if (cpumask_test_cpu(i, cpu_in_guest))
+ smp_call_function_single(i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2581,8 +2587,8 @@
*/
if (prev_cpu != pcpu) {
if (prev_cpu >= 0 &&
- cpu_first_thread_sibling(prev_cpu) !=
- cpu_first_thread_sibling(pcpu))
+ cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
@@ -3554,6 +3560,18 @@
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+#ifdef CONFIG_PPC_PSERIES
+ if (kvmhv_on_pseries()) {
+ barrier();
+ if (vcpu->arch.vpa.pinned_addr) {
+ struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+ } else {
+ get_lppaca()->pmcregs_in_use = 1;
+ }
+ barrier();
+ }
+#endif
kvmhv_load_guest_pmu(vcpu);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@ -3621,6 +3639,7 @@
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
kvmppc_nested_cede(vcpu);
+ kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
@@ -3635,7 +3654,10 @@
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
+ /* Save guest CTRL register, set runlatch to 1 */
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
@@ -3684,6 +3706,13 @@
save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);
+#ifdef CONFIG_PPC_PSERIES
+ if (kvmhv_on_pseries()) {
+ barrier();
+ get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+ barrier();
+ }
+#endif
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
@@ -5188,6 +5217,12 @@
case KVM_PPC_ALLOCATE_HTAB: {
u32 htab_order;
+ /* If we're a nested hypervisor, we currently only support radix */
+ if (kvmhv_on_pseries()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+
r = -EFAULT;
if (get_user(htab_order, (u32 __user *)argp))
break;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7c19096..4a91b54 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -847,7 +847,7 @@
* Thus we make all 4 threads use the same bit.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu = cpu_first_thread_sibling(pcpu);
+ pcpu = cpu_first_tlb_thread_sibling(pcpu);
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index cdf30c6..9906d20 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -51,7 +51,8 @@
hr->ppr = vcpu->arch.ppr;
}
-static void byteswap_pt_regs(struct pt_regs *regs)
+/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
{
unsigned long *addr = (unsigned long *) regs;
@@ -231,6 +232,9 @@
if (vcpu->kvm->arch.l1_ptcr == 0)
return H_NOT_AVAILABLE;
+ if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+ return H_BAD_MODE;
+
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
@@ -252,6 +256,23 @@
if (l2_hv.vcpu_token >= NR_CPUS)
return H_PARAMETER;
+ /*
+ * L1 must have set up a suspended state to enter the L2 in a
+ * transactional state, and only in that case. These have to be
+ * filtered out here to prevent causing a TM Bad Thing in the
+ * host HRFID. We could synthesize a TM Bad Thing back to the L1
+ * here but there doesn't seem like much point.
+ */
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+ if (!MSR_TM_ACTIVE(l2_regs.msr))
+ return H_BAD_MODE;
+ } else {
+ if (l2_regs.msr & MSR_TS_MASK)
+ return H_BAD_MODE;
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+ return H_BAD_MODE;
+ }
+
/* translate lpid */
l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
if (!l2)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 2203054..9bf3be4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -67,7 +67,7 @@
* so use the bit for the first thread to represent the core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0496e66..feaf6ca 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1117,7 +1117,7 @@
ld r7, VCPU_GPR(R7)(r4)
bne ret_to_ultra
- lwz r0, VCPU_CR(r4)
+ ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R0)(r4)
@@ -1137,7 +1137,7 @@
* R3 = UV_RETURN
*/
ret_to_ultra:
- lwz r0, VCPU_CR(r4)
+ ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R3)(r4)
@@ -3137,7 +3137,7 @@
/* The following code handles the fake_suspend = 1 case */
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -PPC_MIN_STKFRM(r1)
+ stdu r1, -TM_FRAME_SIZE(r1)
/* Turn on TM. */
mfmsr r8
@@ -3152,10 +3152,42 @@
END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
nop
+ /*
+ * It's possible that treclaim. may modify registers, if we have lost
+ * track of fake-suspend state in the guest due to it using rfscv.
+ * Save and restore registers in case this occurs.
+ */
+ mfspr r3, SPRN_DSCR
+ mfspr r4, SPRN_XER
+ mfspr r5, SPRN_AMR
+ /* SPRN_TAR would need to be saved here if the kernel ever used it */
+ mfcr r12
+ SAVE_NVGPRS(r1)
+ SAVE_GPR(2, r1)
+ SAVE_GPR(3, r1)
+ SAVE_GPR(4, r1)
+ SAVE_GPR(5, r1)
+ stw r12, 8(r1)
+ std r1, HSTATE_HOST_R1(r13)
+
/* We have to treclaim here because that's the only way to do S->N */
li r3, TM_CAUSE_KVM_RESCHED
TRECLAIM(R3)
+ GET_PACA(r13)
+ ld r1, HSTATE_HOST_R1(r13)
+ REST_GPR(2, r1)
+ REST_GPR(3, r1)
+ REST_GPR(4, r1)
+ REST_GPR(5, r1)
+ lwz r12, 8(r1)
+ REST_NVGPRS(r1)
+ mtspr SPRN_DSCR, r3
+ mtspr SPRN_XER, r4
+ mtspr SPRN_AMR, r5
+ mtcr r12
+ HMT_MEDIUM
+
/*
* We were in fake suspend, so we are not going to save the
* register state as the guest checkpointed state (since
@@ -3183,7 +3215,7 @@
std r5, VCPU_TFHAR(r9)
std r6, VCPU_TFIAR(r9)
- addi r1, r1, PPC_MIN_STKFRM
+ addi r1, r1, TM_FRAME_SIZE
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 0db9374..cc90b8b 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -3,6 +3,8 @@
* Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
@@ -44,7 +46,18 @@
u64 newmsr, bescr;
int ra, rs;
- switch (instr & 0xfc0007ff) {
+ /*
+ * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+ * in these instructions, so masking bit 31 out doesn't change these
+ * instructions. For treclaim., tsr., and trechkpt. instructions if bit
+ * 31 = 0 then they are per ISA invalid forms, however P9 UM, in section
+ * 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit
+ * 31 is an acceptable way to handle these invalid forms that have
+ * bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/
+ * bit 31 set) can generate a softpatch interrupt. Hence both forms
+ * are handled below for these instructions so they behave the same way.
+ */
+ switch (instr & PO_XOP_OPCODE_MASK) {
case PPC_INST_RFID:
/* XXX do we need to check for PR=0 here? */
newmsr = vcpu->arch.shregs.srr1;
@@ -105,7 +118,8 @@
vcpu->arch.shregs.msr = newmsr;
return RESUME_GUEST;
- case PPC_INST_TSR:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
/* check for PR=1 and arch 2.06 bit set in PCR */
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
/* generate an illegal instruction interrupt */
@@ -140,7 +154,8 @@
vcpu->arch.shregs.msr = msr;
return RESUME_GUEST;
- case PPC_INST_TRECLAIM:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
/* generate an illegal instruction interrupt */
@@ -176,7 +191,8 @@
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST;
- case PPC_INST_TRECHKPT:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK):
/* XXX do we need to check for PR=0 here? */
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
@@ -208,6 +224,8 @@
}
/* What should we do here? We didn't recognize the instruction */
- WARN_ON_ONCE(1);
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr);
+
return RESUME_GUEST;
}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
index 2172462..fad931f 100644
--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -23,7 +23,18 @@
u64 newmsr, msr, bescr;
int rs;
- switch (instr & 0xfc0007ff) {
+ /*
+ * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+ * in these instructions, so masking bit 31 out doesn't change these
+ * instructions. For the tsr. instruction if bit 31 = 0 then it is per
+ * ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid
+ * Forms, informs specifically that ignoring bit 31 is an acceptable way
+ * to handle TM-related invalid forms that have bit 31 = 0. Moreover,
+ * for emulation purposes both forms (w/ and wo/ bit 31 set) can
+ * generate a softpatch interrupt. Hence both forms are handled below
+ * for tsr. to make them behave the same way.
+ */
+ switch (instr & PO_XOP_OPCODE_MASK) {
case PPC_INST_RFID:
/* XXX do we need to check for PR=0 here? */
newmsr = vcpu->arch.shregs.srr1;
@@ -73,7 +84,8 @@
vcpu->arch.shregs.msr = newmsr;
return 1;
- case PPC_INST_TSR:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
/* we know the MSR has the TS field = S (0b01) here */
msr = vcpu->arch.shregs.msr;
/* check for PR=1 and arch 2.06 bit set in PCR */
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc65af8..3f6ad3f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1769,10 +1769,12 @@
err = kvmppc_mmu_init(vcpu);
if (err < 0)
- goto uninit_vcpu;
+ goto free_shared_page;
return vcpu;
+free_shared_page:
+ free_page((unsigned long)vcpu->arch.shared);
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_shadow_vcpu:
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 26b2599..41137ec 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -240,6 +240,17 @@
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
+ if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+ /*
+ * Don't overflow our args array: ensure there is room for
+ * at least rets[0] (even if the call specifies 0 nret).
+ *
+ * Each handler must then check for the correct nargs and nret
+ * values, but they may always return failure in rets[0].
+ */
+ rc = -EINVAL;
+ goto fail;
+ }
args.rets = &args.args[be32_to_cpu(args.nargs)];
mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
@@ -267,9 +278,17 @@
fail:
/*
* We only get here if the guest has called RTAS with a bogus
- * args pointer. That means we can't get to the args, and so we
- * can't fail the RTAS call. So fail right out to userspace,
- * which should kill the guest.
+ * args pointer or nargs/nret values that would overflow the
+ * array. That means we can't get to the args, and so we can't
+ * fail the RTAS call. So fail right out to userspace, which
+ * should kill the guest.
+ *
+ * SLOF should actually pass the hcall return value from the
+ * rtas handler call in r3, so enter_rtas could be modified to
+ * return a failure indication in r3 and we could return such
+ * errors to the guest rather than failing to host userspace.
+ * However old guests that don't test for failure could then
+ * continue silently after errors, so for now we won't do this.
*/
return rc;
}
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a3f9c66..baa7408 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -2005,6 +2005,10 @@
pr_devel("Creating xive for partition\n");
+ /* Already there ? */
+ if (kvm->arch.xive)
+ return -EEXIST;
+
xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
@@ -2014,12 +2018,6 @@
xive->kvm = kvm;
mutex_init(&xive->lock);
- /* Already there ? */
- if (kvm->arch.xive)
- ret = -EEXIST;
- else
- kvm->arch.xive = xive;
-
/* We use the default queue size set by the host */
xive->q_order = xive_native_default_eq_shift();
if (xive->q_order < PAGE_SHIFT)
@@ -2039,6 +2037,7 @@
if (ret)
return ret;
+ kvm->arch.xive = xive;
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 78b906f..d78d848 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -50,6 +50,24 @@
}
}
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+ u8 prio, __be32 *qpage,
+ u32 order, bool can_escalate)
+{
+ int rc;
+ __be32 *qpage_prev = q->qpage;
+
+ rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+ can_escalate);
+ if (rc)
+ return rc;
+
+ if (qpage_prev)
+ put_page(virt_to_page(qpage_prev));
+
+ return rc;
+}
+
void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -234,6 +252,13 @@
}
state = &sb->irq_state[src];
+
+ /* Some sanity checking */
+ if (!state->valid) {
+ pr_devel("%s: source %lx invalid !\n", __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
kvmppc_xive_select_irq(state, &hw_num, &xd);
arch_spin_lock(&sb->lock);
@@ -582,19 +607,14 @@
q->guest_qaddr = 0;
q->guest_qshift = 0;
- rc = xive_native_configure_queue(xc->vp_id, q, priority,
- NULL, 0, true);
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ NULL, 0, true);
if (rc) {
pr_err("Failed to reset queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
return rc;
}
- if (q->qpage) {
- put_page(virt_to_page(q->qpage));
- q->qpage = NULL;
- }
-
return 0;
}
@@ -624,6 +644,14 @@
srcu_idx = srcu_read_lock(&kvm->srcu);
gfn = gpa_to_gfn(kvm_eq.qaddr);
+
+ page_size = kvm_host_page_size(vcpu, gfn);
+ if (1ull << kvm_eq.qshift > page_size) {
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ pr_warn("Incompatible host page size %lx!\n", page_size);
+ return -EINVAL;
+ }
+
page = gfn_to_page(kvm, gfn);
if (is_error_page(page)) {
srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -631,13 +659,6 @@
return -EINVAL;
}
- page_size = kvm_host_page_size(kvm, gfn);
- if (1ull << kvm_eq.qshift > page_size) {
- srcu_read_unlock(&kvm->srcu, srcu_idx);
- pr_warn("Incompatible host page size %lx!\n", page_size);
- return -EINVAL;
- }
-
qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -653,8 +674,8 @@
* OPAL level because the use of END ESBs is not supported by
* Linux.
*/
- rc = xive_native_configure_queue(xc->vp_id, q, priority,
- (__be32 *) qaddr, kvm_eq.qshift, true);
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ (__be32 *) qaddr, kvm_eq.qshift, true);
if (rc) {
pr_err("Failed to configure queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
@@ -1081,7 +1102,6 @@
dev->private = xive;
xive->dev = dev;
xive->kvm = kvm;
- kvm->arch.xive = xive;
mutex_init(&xive->mapping_lock);
mutex_init(&xive->lock);
@@ -1102,6 +1122,7 @@
if (ret)
return ret;
+ kvm->arch.xive = xive;
return 0;
}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 321db0f..7154bd4 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -734,7 +734,8 @@
return 0;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
/* kvm_unmap_hva flushes everything anyways */
kvm_unmap_hva(kvm, start);
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 2e496eb..1139bc5 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -73,7 +73,6 @@
{
struct kvm_run *run = vcpu->run;
u32 inst;
- int ra, rs, rt;
enum emulation_result emulated = EMULATE_FAIL;
int advance = 1;
struct instruction_op op;
@@ -85,10 +84,6 @@
if (emulated != EMULATE_DONE)
return emulated;
- ra = get_ra(inst);
- rs = get_rs(inst);
- rt = get_rt(inst);
-
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3a77bb6..8dd4d2b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1513,7 +1513,7 @@
return emulated;
}
-int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
+static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
{
union kvmppc_one_reg reg;
int vmx_offset = 0;
@@ -1531,7 +1531,7 @@
return result;
}
-int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
+static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
{
union kvmppc_one_reg reg;
int vmx_offset = 0;
@@ -1549,7 +1549,7 @@
return result;
}
-int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
+static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
{
union kvmppc_one_reg reg;
int vmx_offset = 0;
@@ -1567,7 +1567,7 @@
return result;
}
-int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
+static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
{
union kvmppc_one_reg reg;
int vmx_offset = 0;
@@ -2035,9 +2035,9 @@
{
struct kvm_enable_cap cap;
r = -EFAULT;
- vcpu_load(vcpu);
if (copy_from_user(&cap, argp, sizeof(cap)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
vcpu_put(vcpu);
break;
@@ -2061,9 +2061,9 @@
case KVM_DIRTY_TLB: {
struct kvm_dirty_tlb dirty;
r = -EFAULT;
- vcpu_load(vcpu);
if (copy_from_user(&dirty, argp, sizeof(dirty)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
vcpu_put(vcpu);
break;