Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2ce9da5..d3877dd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -343,6 +343,8 @@
{
BUG_ON((u64)(unsigned)access_mask != access_mask);
BUG_ON((mmio_mask & mmio_value) != mmio_value);
+ WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len));
+ WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
shadow_mmio_access_mask = access_mask;
@@ -405,11 +407,11 @@
}
/*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
* the memslots generation and is derived as follows:
*
* Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
*
* The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
* the MMIO generation number, as doing so would require stealing a bit from
@@ -418,25 +420,38 @@
* requires a full MMU zap). The flag is instead explicitly queried when
* checking for MMIO spte cache hits.
*/
-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0)
#define MMIO_SPTE_GEN_LOW_START 3
#define MMIO_SPTE_GEN_LOW_END 11
+
+#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
+#define MMIO_SPTE_GEN_HIGH_END 62
+
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
MMIO_SPTE_GEN_LOW_START)
-
-#define MMIO_SPTE_GEN_HIGH_START 52
-#define MMIO_SPTE_GEN_HIGH_END 61
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
MMIO_SPTE_GEN_HIGH_START)
+
+#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
+#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
+
+/* remember to adjust the comment above as well if you change these */
+static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
+
+#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
+#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
+
+#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
+
static u64 generation_mmio_spte_mask(u64 gen)
{
u64 mask;
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
- mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
- mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+ mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
+ mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
return mask;
}
@@ -444,10 +459,8 @@
{
u64 gen;
- spte &= ~shadow_mmio_mask;
-
- gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
- gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+ gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
+ gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
return gen;
}
@@ -538,16 +551,20 @@
static u8 kvm_get_shadow_phys_bits(void)
{
/*
- * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
- * in CPU detection code, but MKTME treats those reduced bits as
- * 'keyID' thus they are not reserved bits. Therefore for MKTME
- * we should still return physical address bits reported by CPUID.
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
+ * in CPU detection code, but the processor treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
+ * the physical address bits reported by CPUID.
*/
- if (!boot_cpu_has(X86_FEATURE_TME) ||
- WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
- return boot_cpu_data.x86_phys_bits;
+ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
+ return cpuid_eax(0x80000008) & 0xff;
- return cpuid_eax(0x80000008) & 0xff;
+ /*
+ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
+ * custom CPUID. Proceed with whatever the kernel found since these features
+ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
+ */
+ return boot_cpu_data.x86_phys_bits;
}
static void kvm_mmu_reset_all_pte_masks(void)
@@ -576,16 +593,15 @@
* the most significant bits of legal physical address space.
*/
shadow_nonpresent_or_rsvd_mask = 0;
- low_phys_bits = boot_cpu_data.x86_cache_bits;
- if (boot_cpu_data.x86_cache_bits <
- 52 - shadow_nonpresent_or_rsvd_mask_len) {
+ low_phys_bits = boot_cpu_data.x86_phys_bits;
+ if (boot_cpu_has_bug(X86_BUG_L1TF) &&
+ !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >=
+ 52 - shadow_nonpresent_or_rsvd_mask_len)) {
+ low_phys_bits = boot_cpu_data.x86_cache_bits
+ - shadow_nonpresent_or_rsvd_mask_len;
shadow_nonpresent_or_rsvd_mask =
- rsvd_bits(boot_cpu_data.x86_cache_bits -
- shadow_nonpresent_or_rsvd_mask_len,
- boot_cpu_data.x86_cache_bits - 1);
- low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
- } else
- WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
+ rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1);
+ }
shadow_nonpresent_or_rsvd_lower_gfn_mask =
GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
@@ -1282,12 +1298,12 @@
return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
}
-static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
+static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn)
{
unsigned long page_size;
int i, ret = 0;
- page_size = kvm_host_page_size(kvm, gfn);
+ page_size = kvm_host_page_size(vcpu, gfn);
for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
if (page_size >= KVM_HPAGE_SIZE(i))
@@ -1337,7 +1353,7 @@
if (unlikely(*force_pt_level))
return PT_PAGE_TABLE_LEVEL;
- host_level = host_mapping_level(vcpu->kvm, large_gfn);
+ host_level = host_mapping_level(vcpu, large_gfn);
if (host_level == PT_PAGE_TABLE_LEVEL)
return host_level;
@@ -1814,10 +1830,10 @@
* Emulate arch specific page modification logging for the
* nested hypervisor
*/
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa)
{
if (kvm_x86_ops->write_log_dirty)
- return kvm_x86_ops->write_log_dirty(vcpu);
+ return kvm_x86_ops->write_log_dirty(vcpu, l2_gpa);
return 0;
}
@@ -2040,7 +2056,8 @@
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
}
@@ -2126,7 +2143,7 @@
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2226,13 +2243,6 @@
{
}
-static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp, u64 *spte,
- const void *pte)
-{
- WARN_ON(1);
-}
-
#define KVM_PAGE_ARRAY_NR 16
struct kvm_mmu_pages {
@@ -3528,7 +3538,7 @@
* - true: let the vcpu to access on the same address again.
* - false: let the real page fault path to fix it.
*/
-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level,
u32 error_code)
{
struct kvm_shadow_walk_iterator iterator;
@@ -3548,7 +3558,7 @@
do {
u64 new_spte;
- for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
+ for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
if (!is_shadow_present_pte(spte) ||
iterator.level < level)
break;
@@ -3626,7 +3636,7 @@
} while (true);
- trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
+ trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
spte, fault_handled);
walk_shadow_page_lockless_end(vcpu);
@@ -3634,10 +3644,11 @@
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
+ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
+ bool *writable);
static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
+static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
gfn_t gfn, bool prefault)
{
int r;
@@ -3663,16 +3674,16 @@
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
}
- if (fast_page_fault(vcpu, v, level, error_code))
+ if (fast_page_fault(vcpu, gpa, level, error_code))
return RET_PF_RETRY;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
return RET_PF_RETRY;
- if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+ if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r))
return r;
r = RET_PF_RETRY;
@@ -3683,7 +3694,7 @@
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
prefault, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3981,7 +3992,7 @@
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access, struct x86_exception *exception)
{
if (exception)
@@ -3989,7 +4000,7 @@
return vaddr;
}
-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
+static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
@@ -4149,13 +4160,14 @@
walk_shadow_page_lockless_end(vcpu);
}
-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa,
u32 error_code, bool prefault)
{
- gfn_t gfn = gva >> PAGE_SHIFT;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
int r;
- pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
+ /* Note, paging is disabled, ergo gva == gpa. */
+ pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code);
if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
@@ -4167,11 +4179,12 @@
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
- return nonpaging_map(vcpu, gva & PAGE_MASK,
+ return nonpaging_map(vcpu, gpa & PAGE_MASK,
error_code, gfn, prefault);
}
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ gfn_t gfn)
{
struct kvm_arch_async_pf arch;
@@ -4180,11 +4193,13 @@
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
- return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+ return kvm_setup_async_pf(vcpu, cr2_or_gpa,
+ kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
+ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
+ bool *writable)
{
struct kvm_memory_slot *slot;
bool async;
@@ -4204,12 +4219,12 @@
return false; /* *pfn has correct page already */
if (!prefault && kvm_can_do_async_pf(vcpu)) {
- trace_kvm_try_async_get_page(gva, gfn);
+ trace_kvm_try_async_get_page(cr2_or_gpa, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
- trace_kvm_async_pf_doublefault(gva, gfn);
+ trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return true;
- } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
+ } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
return true;
}
@@ -4222,6 +4237,12 @@
{
int r = 1;
+#ifndef CONFIG_X86_64
+ /* A 64-bit CR2 should be impossible on 32-bit KVM. */
+ if (WARN_ON_ONCE(fault_address >> 32))
+ return -EFAULT;
+#endif
+
vcpu->arch.l1tf_flush_l1d = true;
switch (vcpu->arch.apf.host_apf_reason) {
default:
@@ -4259,7 +4280,7 @@
return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
}
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
bool prefault)
{
kvm_pfn_t pfn;
@@ -4328,7 +4349,6 @@
context->gva_to_gpa = nonpaging_gva_to_gpa;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
- context->update_pte = nonpaging_update_pte;
context->root_level = 0;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->direct_map = true;
@@ -4564,7 +4584,7 @@
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+ gbpages_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
@@ -4646,7 +4666,15 @@
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
- bool uses_nx = context->nx ||
+ /*
+ * KVM uses NX when TDP is disabled to handle a variety of scenarios,
+ * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and
+ * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0.
+ * The iTLB multi-hit workaround can be toggled at any time, so assume
+ * NX can be used by any non-nested shadow MMU to avoid having to reset
+ * MMU contexts. Note, KVM forces EFER.NX=1 when TDP is disabled.
+ */
+ bool uses_nx = context->nx || !tdp_enabled ||
context->mmu_role.base.smep_andnot_wp;
struct rsvd_bits_validate *shadow_zero_check;
int i;
@@ -4907,7 +4935,6 @@
context->gva_to_gpa = paging64_gva_to_gpa;
context->sync_page = paging64_sync_page;
context->invlpg = paging64_invlpg;
- context->update_pte = paging64_update_pte;
context->shadow_root_level = level;
context->direct_map = false;
}
@@ -4936,7 +4963,6 @@
context->gva_to_gpa = paging32_gva_to_gpa;
context->sync_page = paging32_sync_page;
context->invlpg = paging32_invlpg;
- context->update_pte = paging32_update_pte;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->direct_map = false;
}
@@ -5011,7 +5037,6 @@
context->page_fault = tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
- context->update_pte = nonpaging_update_pte;
context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
context->direct_map = true;
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
@@ -5144,7 +5169,6 @@
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg;
- context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL;
context->direct_map = false;
context->mmu_role.as_u64 = new_role.as_u64;
@@ -5284,19 +5308,6 @@
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
-static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp, u64 *spte,
- const void *new)
-{
- if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
- ++vcpu->kvm->stat.mmu_pde_zapped;
- return;
- }
-
- ++vcpu->kvm->stat.mmu_pte_updated;
- vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
-}
-
static bool need_remote_flush(u64 old, u64 new)
{
if (!is_shadow_present_pte(old))
@@ -5462,14 +5473,10 @@
local_flush = true;
while (npte--) {
- u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
-
entry = *spte;
mmu_page_zap_pte(vcpu->kvm, sp, spte);
- if (gentry &&
- !((sp->role.word ^ base_role)
- & mmu_base_role_mask.word) && rmap_can_add(vcpu))
- mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
+ if (gentry && sp->role.level != PG_LEVEL_4K)
+ ++vcpu->kvm->stat.mmu_pde_zapped;
if (need_remote_flush(entry, *spte))
remote_flush = true;
++spte;
@@ -5516,7 +5523,7 @@
return 0;
}
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = 0;
@@ -5525,18 +5532,18 @@
/* With shadow page tables, fault_address contains a GVA or nGPA. */
if (vcpu->arch.mmu->direct_map) {
vcpu->arch.gpa_available = true;
- vcpu->arch.gpa_val = cr2;
+ vcpu->arch.gpa_val = cr2_or_gpa;
}
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, cr2, direct);
+ r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
if (r == RET_PF_EMULATE)
goto emulate;
}
if (r == RET_PF_INVALID) {
- r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+ r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
lower_32_bits(error_code),
false);
WARN_ON(r == RET_PF_INVALID);
@@ -5556,7 +5563,7 @@
*/
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
return 1;
}
@@ -5571,7 +5578,7 @@
* explicitly shadowing L1's page tables, i.e. unprotecting something
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
*/
- if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+ if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
@@ -5586,7 +5593,7 @@
return 1;
}
- return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+ return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -6232,25 +6239,16 @@
u64 mask;
/*
- * Set the reserved bits and the present bit of an paging-structure
- * entry to generate page fault with PFER.RSV = 1.
+ * Set a reserved PA bit in MMIO SPTEs to generate page faults with
+ * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT
+ * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports
+ * 52-bit physical addresses then there are no reserved PA bits in the
+ * PTEs and so the reserved PA approach must be disabled.
*/
-
- /*
- * Mask the uppermost physical address bit, which would be reserved as
- * long as the supported physical address width is less than 52.
- */
- mask = 1ull << 51;
-
- /* Set the present bit. */
- mask |= 1ull;
-
- /*
- * If reserved bit is not supported, clear the present bit to disable
- * mmio page fault.
- */
- if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
- mask &= ~1ull;
+ if (shadow_phys_bits < 52)
+ mask = BIT_ULL(51) | PT_PRESENT_MASK;
+ else
+ mask = 0;
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
}
@@ -6445,6 +6443,7 @@
cond_resched_lock(&kvm->mmu_lock);
}
}
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);