Handle spurious page faults due to break-before-make.
Also adding test that triggers spurious faults and fails without this
fix, but passes with it.
Change-Id: I30c591d87c5278a8bb4ed4ec992544f751204d90
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index df7ade3..1ceb04d 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -422,10 +422,44 @@
return api_abort(current());
}
+/**
+ * Initialises a fault info structure. It assumes that an FnV bit exists at
+ * bit offset 10 of the ESR, and that it is only valid when the bottom 6 bits of
+ * the ESR (the fault status code) are 010000; this is the case for both
+ * instruction and data aborts, but not necessarily for other exception reasons.
+ */
+static struct vcpu_fault_info fault_info_init(uintreg_t esr,
+ const struct vcpu *vcpu, int mode,
+ uint8_t size)
+{
+ uint32_t fsc = esr & 0x3f;
+ struct vcpu_fault_info r;
+
+ r.mode = mode;
+ r.size = size;
+ r.pc = va_init(vcpu->regs.pc);
+
+ /*
+ * Check the FnV bit, which is only valid if dfsc/ifsc is 010000. It
+ * indicates that we cannot rely on far_el2.
+ */
+ if (fsc == 0x10 && esr & (1u << 10)) {
+ r.vaddr = va_init(0);
+ r.ipaddr = ipa_init(read_msr(hpfar_el2) << 8);
+ } else {
+ r.vaddr = va_init(read_msr(far_el2));
+ r.ipaddr = ipa_init((read_msr(hpfar_el2) << 8) |
+ (read_msr(far_el2) & (PAGE_SIZE - 1)));
+ }
+
+ return r;
+}
+
struct vcpu *sync_lower_exception(uintreg_t esr)
{
struct vcpu *vcpu = current();
int32_t ret;
+ struct vcpu_fault_info info;
switch (esr >> 26) {
case 0x01: /* EC = 000001, WFI or WFE. */
@@ -438,34 +472,30 @@
return api_wait_for_interrupt(vcpu);
case 0x24: /* EC = 100100, Data abort. */
- dlog("Lower data abort: pc=0x%x, esr=0x%x, ec=0x%x, vmid=%u, "
- "vcpu=%u",
- vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
- vcpu_index(vcpu));
- if (!(esr & (1u << 10))) { /* Check FnV bit. */
- dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
- read_msr(hpfar_el2) << 8);
- } else {
- dlog(", far=invalid");
- }
+ /*
+ * Determine the size based on the SAS bits, which are only
+ * valid if the ISV bit is set. The WnR bit is used to decide
+ * if it's a read or write.
+ */
+ info = fault_info_init(
+ esr, vcpu, (esr & (1u << 6)) ? MM_MODE_W : MM_MODE_R,
+ (esr & (1u << 24)) ? (1u << ((esr >> 22) & 0x3)) : 0);
- dlog("\n");
+ /* Call the platform-independent handler. */
+ if (vcpu_handle_page_fault(vcpu, &info)) {
+ return NULL;
+ }
break;
case 0x20: /* EC = 100000, Instruction abort. */
- dlog("Lower instruction abort: pc=0x%x, esr=0x%x, ec=0x%x, "
- "vmdid=%u, vcpu=%u",
- vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
- vcpu_index(vcpu));
- if (!(esr & (1u << 10))) { /* Check FnV bit. */
- dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
- read_msr(hpfar_el2) << 8);
- } else {
- dlog(", far=invalid");
- }
+ /* Determine the size based on the IL bit. */
+ info = fault_info_init(esr, vcpu, MM_MODE_X,
+ (esr & (1u << 25)) ? 4 : 2);
- dlog(", vttbr_el2=0x%x", read_msr(vttbr_el2));
- dlog("\n");
+ /* Call the platform-independent handler. */
+ if (vcpu_handle_page_fault(vcpu, &info)) {
+ return NULL;
+ }
break;
case 0x17: /* EC = 010111, SMC instruction. */
diff --git a/src/cpu.c b/src/cpu.c
index 03e0074..5db709c 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -160,3 +160,74 @@
{
return vcpu - vcpu->vm->vcpus;
}
+
+/**
+ * Handles a page fault. It does so by determining if it's a legitimate or
+ * spurious fault, and recovering from the latter.
+ *
+ * Returns true if the caller should resume the current vcpu, or false if its VM
+ * should be aborted.
+ */
+bool vcpu_handle_page_fault(const struct vcpu *current,
+ struct vcpu_fault_info *f)
+{
+ struct vm *vm = current->vm;
+ ipaddr_t second_addr;
+ bool second;
+ int mode;
+ int mask = f->mode | MM_MODE_INVALID;
+ bool ret = false;
+
+ /* We can't recover if we don't know the size. */
+ if (f->size == 0) {
+ goto exit;
+ }
+
+ sl_lock(&vm->lock);
+
+ /*
+ * Check if this is a legitimate fault, i.e., if the page table doesn't
+ * allow the access attemped by the VM.
+ */
+ if (!mm_vm_get_mode(&vm->ptable, f->ipaddr, ipa_add(f->ipaddr, 1),
+ &mode) ||
+ (mode & mask) != f->mode) {
+ goto exit_unlock;
+ }
+
+ /*
+ * Do the same mode check on the second page, if the fault straddles two
+ * pages.
+ */
+ second_addr = ipa_add(f->ipaddr, f->size - 1);
+ second = (ipa_addr(f->ipaddr) >> PAGE_BITS) !=
+ (ipa_addr(second_addr) >> PAGE_BITS);
+ if (second) {
+ if (!mm_vm_get_mode(&vm->ptable, second_addr,
+ ipa_add(second_addr, 1), &mode) ||
+ (mode & mask) != f->mode) {
+ goto exit_unlock;
+ }
+ }
+
+ /*
+ * This is a spurious fault, likely because another CPU is updating the
+ * page table. It is responsible for issuing global tlb invalidations
+ * while holding the VM lock, so we don't need to do anything else to
+ * recover from it. (Acquiring/releasing the lock ensured that the
+ * invalidations have completed.)
+ */
+
+ ret = true;
+
+exit_unlock:
+ sl_unlock(&vm->lock);
+exit:
+ if (!ret) {
+ dlog("Stage-2 page fault: pc=0x%x, vmid=%u, vcpu=%u, "
+ "vaddr=0x%x, ipaddr=0x%x, mode=0x%x, size=%u\n",
+ f->pc, vm->id, vcpu_index(current), f->vaddr, f->ipaddr,
+ f->mode, f->size);
+ }
+ return ret;
+}