Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c5673bd..425444d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -516,6 +516,9 @@
c->intercept_dr = h->intercept_dr | g->intercept_dr;
c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
+
+ c->intercept |= (1ULL << INTERCEPT_VMLOAD);
+ c->intercept |= (1ULL << INTERCEPT_VMSAVE);
}
static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
@@ -787,9 +790,6 @@
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
return 0;
} else {
- if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
- pr_err("%s: ip 0x%lx next 0x%llx\n",
- __func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip);
}
svm_set_interrupt_shadow(vcpu, 0);
@@ -892,6 +892,11 @@
return 0;
}
+ if (sev_active()) {
+ pr_info("KVM is unsupported when running as an SEV guest\n");
+ return 0;
+ }
+
return 1;
}
@@ -998,33 +1003,32 @@
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd;
- int r;
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- r = -ENOMEM;
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
- goto err_1;
+ goto free_cpu_data;
if (svm_sev_enabled()) {
- r = -ENOMEM;
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
sizeof(void *),
GFP_KERNEL);
if (!sd->sev_vmcbs)
- goto err_1;
+ goto free_save_area;
}
per_cpu(svm_data, cpu) = sd;
return 0;
-err_1:
+free_save_area:
+ __free_page(sd->save_area);
+free_cpu_data:
kfree(sd);
- return r;
+ return -ENOMEM;
}
@@ -1298,6 +1302,47 @@
}
}
+/*
+ * The default MMIO mask is a single bit (excluding the present bit),
+ * which could conflict with the memory encryption bit. Check for
+ * memory encryption support and override the default MMIO mask if
+ * memory encryption is enabled.
+ */
+static __init void svm_adjust_mmio_mask(void)
+{
+ unsigned int enc_bit, mask_bit;
+ u64 msr, mask;
+
+ /* If there is no memory encryption support, use existing mask */
+ if (cpuid_eax(0x80000000) < 0x8000001f)
+ return;
+
+ /* If memory encryption is not enabled, use existing mask */
+ rdmsrl(MSR_K8_SYSCFG, msr);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ return;
+
+ enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
+ mask_bit = boot_cpu_data.x86_phys_bits;
+
+ /* Increment the mask bit if it is the same as the encryption bit */
+ if (enc_bit == mask_bit)
+ mask_bit++;
+
+ /*
+ * If the mask bit location is below 52, then some bits above the
+ * physical addressing limit will always be reserved, so use the
+ * rsvd_bits() function to generate the mask. This mask, along with
+ * the present bit, will be used to generate a page fault with
+ * PFER.RSV = 1.
+ *
+ * If the mask bit location is 52 (or above), then clear the mask.
+ */
+ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1352,6 +1397,8 @@
}
}
+ svm_adjust_mmio_mask();
+
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -1399,12 +1446,7 @@
}
}
- if (vgif) {
- if (!boot_cpu_has(X86_FEATURE_VGIF))
- vgif = false;
- else
- pr_info("Virtual GIF supported\n");
- }
+ vgif = false; /* Disabled for CVE-2021-3653 */
return 0;
@@ -1739,7 +1781,7 @@
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
- sd->sev_vmcbs[pos] = NULL;
+ sd->sev_vmcbs[asid] = NULL;
}
}
@@ -1750,9 +1792,25 @@
__sev_asid_free(sev->asid);
}
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission *decommission;
+
+ if (!handle)
+ return;
+
+ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+ if (!decommission)
+ return;
+
+ decommission->handle = handle;
+ sev_guest_decommission(decommission, NULL);
+
+ kfree(decommission);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate *data;
if (!handle)
@@ -1770,15 +1828,7 @@
sev_guest_df_flush(NULL);
kfree(data);
- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
- if (!decommission)
- return;
-
- /* decommission handle */
- decommission->handle = handle;
- sev_guest_decommission(decommission, NULL);
-
- kfree(decommission);
+ sev_decommission(handle);
}
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
@@ -1791,6 +1841,8 @@
struct page **pages;
unsigned long first, last;
+ lockdep_assert_held(&kvm->lock);
+
if (ulen == 0 || uaddr + ulen < uaddr)
return NULL;
@@ -1818,7 +1870,7 @@
return NULL;
/* Pin the user virtual address. */
- npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
goto err;
@@ -1883,6 +1935,10 @@
struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm),
GFP_KERNEL_ACCOUNT | __GFP_ZERO,
PAGE_KERNEL);
+
+ if (!kvm_svm)
+ return NULL;
+
return &kvm_svm->kvm;
}
@@ -1910,6 +1966,7 @@
list_for_each_safe(pos, q, head) {
__unregister_enc_region_locked(kvm,
list_entry(pos, struct enc_region, list));
+ cond_resched();
}
}
@@ -3191,8 +3248,8 @@
return NESTED_EXIT_HOST;
break;
case SVM_EXIT_EXCP_BASE + PF_VECTOR:
- /* When we're shadowing, trap PFs, but not async PF */
- if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
+ /* Trap async PF even if not shadowing */
+ if (!npt_enabled || svm->vcpu.arch.apf.host_apf_reason)
return NESTED_EXIT_HOST;
break;
default:
@@ -3281,7 +3338,7 @@
dst->iopm_base_pa = from->iopm_base_pa;
dst->msrpm_base_pa = from->msrpm_base_pa;
dst->tsc_offset = from->tsc_offset;
- dst->asid = from->asid;
+ /* asid not copied, it is handled manually for svm->vmcb. */
dst->tlb_ctl = from->tlb_ctl;
dst->int_ctl = from->int_ctl;
dst->int_vector = from->int_vector;
@@ -3548,7 +3605,13 @@
svm->nested.intercept = nested_vmcb->control.intercept;
svm_flush_tlb(&svm->vcpu, true);
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+
+ svm->vmcb->control.int_ctl &=
+ V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+
+ svm->vmcb->control.int_ctl |= nested_vmcb->control.int_ctl &
+ (V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK);
+
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
@@ -3924,6 +3987,12 @@
return 1;
}
+static int invd_interception(struct vcpu_svm *svm)
+{
+ /* Treat an INVD instruction as a NOP and just skip it. */
+ return kvm_skip_emulated_instruction(&svm->vcpu);
+}
+
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -4001,7 +4070,7 @@
err = 0;
if (cr >= 16) { /* mov to cr */
cr -= 16;
- val = kvm_register_read(&svm->vcpu, reg);
+ val = kvm_register_readl(&svm->vcpu, reg);
switch (cr) {
case 0:
if (!check_selective_cr0_intercepted(svm, val))
@@ -4046,7 +4115,7 @@
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
- kvm_register_write(&svm->vcpu, reg, val);
+ kvm_register_writel(&svm->vcpu, reg, val);
}
return kvm_complete_insn_gp(&svm->vcpu, err);
}
@@ -4076,13 +4145,13 @@
if (dr >= 16) { /* mov to DRn */
if (!kvm_require_dr(&svm->vcpu, dr - 16))
return 1;
- val = kvm_register_read(&svm->vcpu, reg);
+ val = kvm_register_readl(&svm->vcpu, reg);
kvm_set_dr(&svm->vcpu, dr - 16, val);
} else {
if (!kvm_require_dr(&svm->vcpu, dr))
return 1;
kvm_get_dr(&svm->vcpu, dr, &val);
- kvm_register_write(&svm->vcpu, reg, val);
+ kvm_register_writel(&svm->vcpu, reg, val);
}
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -4184,8 +4253,7 @@
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ !guest_has_spec_ctrl_msr(vcpu))
return 1;
msr_info->data = svm->spec_ctrl;
@@ -4269,16 +4337,13 @@
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ !guest_has_spec_ctrl_msr(vcpu))
return 1;
- /* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
+ if (kvm_spec_ctrl_test_value(data))
return 1;
svm->spec_ctrl = data;
-
if (!data)
break;
@@ -4297,18 +4362,17 @@
break;
case MSR_IA32_PRED_CMD:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
+ !guest_has_pred_cmd_msr(vcpu))
return 1;
if (data & ~PRED_CMD_IBPB)
return 1;
-
+ if (!boot_cpu_has(X86_FEATURE_IBPB))
+ return 1;
if (!data)
break;
wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
- if (is_guest_mode(vcpu))
- break;
set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
@@ -4776,7 +4840,7 @@
[SVM_EXIT_RDPMC] = rdpmc_interception,
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
- [SVM_EXIT_INVD] = emulate_on_interception,
+ [SVM_EXIT_INVD] = invd_interception,
[SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
@@ -5141,8 +5205,11 @@
return;
}
-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
+ if (!vcpu->arch.apicv_active)
+ return -1;
+
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@@ -5154,6 +5221,8 @@
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
+
+ return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
@@ -5329,6 +5398,7 @@
* - Tell IOMMU to use legacy mode for this interrupt.
* - Retrieve ga_tag of prior interrupt remapping data.
*/
+ pi.prev_ga_tag = 0;
pi.is_guest_mode = false;
ret = irq_set_vcpu_affinity(host_irq, &pi);
@@ -5986,6 +6056,11 @@
return true;
}
+static bool svm_pku_supported(void)
+{
+ return false;
+}
+
#define PRE_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_PRE_EXCEPT, }
#define POST_EX(exit) { .exit_code = (exit), \
@@ -6413,8 +6488,10 @@
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start->handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start->handle);
goto e_free_session;
+ }
/* return handle to userspace */
params.handle = start->handle;
@@ -7031,12 +7108,20 @@
if (!region)
return -ENOMEM;
+ mutex_lock(&kvm->lock);
region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
if (!region->pages) {
ret = -ENOMEM;
+ mutex_unlock(&kvm->lock);
goto e_free;
}
+ region->uaddr = range->addr;
+ region->size = range->size;
+
+ list_add_tail(®ion->list, &sev->regions_list);
+ mutex_unlock(&kvm->lock);
+
/*
* The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are
@@ -7045,13 +7130,6 @@
*/
sev_clflush_pages(region->pages, region->npages);
- region->uaddr = range->addr;
- region->size = range->size;
-
- mutex_lock(&kvm->lock);
- list_add_tail(®ion->list, &sev->regions_list);
- mutex_unlock(&kvm->lock);
-
return ret;
e_free:
@@ -7278,6 +7356,7 @@
.xsaves_supported = svm_xsaves_supported,
.umip_emulated = svm_umip_emulated,
.pt_supported = svm_pt_supported,
+ .pku_supported = svm_pku_supported,
.set_supported_cpuid = svm_set_supported_cpuid,