Handle spurious page faults due to break-before-make.

Also adding test that triggers spurious faults and fails without this
fix, but passes with it.

Change-Id: I30c591d87c5278a8bb4ed4ec992544f751204d90
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index df7ade3..1ceb04d 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -422,10 +422,44 @@
 	return api_abort(current());
 }
 
+/**
+ * Initialises a fault info structure. It assumes that an FnV bit exists at
+ * bit offset 10 of the ESR, and that it is only valid when the bottom 6 bits of
+ * the ESR (the fault status code) are 010000; this is the case for both
+ * instruction and data aborts, but not necessarily for other exception reasons.
+ */
+static struct vcpu_fault_info fault_info_init(uintreg_t esr,
+					      const struct vcpu *vcpu, int mode,
+					      uint8_t size)
+{
+	uint32_t fsc = esr & 0x3f;
+	struct vcpu_fault_info r;
+
+	r.mode = mode;
+	r.size = size;
+	r.pc = va_init(vcpu->regs.pc);
+
+	/*
+	 * Check the FnV bit, which is only valid if dfsc/ifsc is 010000. It
+	 * indicates that we cannot rely on far_el2.
+	 */
+	if (fsc == 0x10 && esr & (1u << 10)) {
+		r.vaddr = va_init(0);
+		r.ipaddr = ipa_init(read_msr(hpfar_el2) << 8);
+	} else {
+		r.vaddr = va_init(read_msr(far_el2));
+		r.ipaddr = ipa_init((read_msr(hpfar_el2) << 8) |
+				    (read_msr(far_el2) & (PAGE_SIZE - 1)));
+	}
+
+	return r;
+}
+
 struct vcpu *sync_lower_exception(uintreg_t esr)
 {
 	struct vcpu *vcpu = current();
 	int32_t ret;
+	struct vcpu_fault_info info;
 
 	switch (esr >> 26) {
 	case 0x01: /* EC = 000001, WFI or WFE. */
@@ -438,34 +472,30 @@
 		return api_wait_for_interrupt(vcpu);
 
 	case 0x24: /* EC = 100100, Data abort. */
-		dlog("Lower data abort: pc=0x%x, esr=0x%x, ec=0x%x, vmid=%u, "
-		     "vcpu=%u",
-		     vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
-		     vcpu_index(vcpu));
-		if (!(esr & (1u << 10))) { /* Check FnV bit. */
-			dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
-			     read_msr(hpfar_el2) << 8);
-		} else {
-			dlog(", far=invalid");
-		}
+		/*
+		 * Determine the size based on the SAS bits, which are only
+		 * valid if the ISV bit is set. The WnR bit is used to decide
+		 * if it's a read or write.
+		 */
+		info = fault_info_init(
+			esr, vcpu, (esr & (1u << 6)) ? MM_MODE_W : MM_MODE_R,
+			(esr & (1u << 24)) ? (1u << ((esr >> 22) & 0x3)) : 0);
 
-		dlog("\n");
+		/* Call the platform-independent handler. */
+		if (vcpu_handle_page_fault(vcpu, &info)) {
+			return NULL;
+		}
 		break;
 
 	case 0x20: /* EC = 100000, Instruction abort. */
-		dlog("Lower instruction abort: pc=0x%x, esr=0x%x, ec=0x%x, "
-		     "vmdid=%u, vcpu=%u",
-		     vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
-		     vcpu_index(vcpu));
-		if (!(esr & (1u << 10))) { /* Check FnV bit. */
-			dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
-			     read_msr(hpfar_el2) << 8);
-		} else {
-			dlog(", far=invalid");
-		}
+		/* Determine the size based on the IL bit. */
+		info = fault_info_init(esr, vcpu, MM_MODE_X,
+				       (esr & (1u << 25)) ? 4 : 2);
 
-		dlog(", vttbr_el2=0x%x", read_msr(vttbr_el2));
-		dlog("\n");
+		/* Call the platform-independent handler. */
+		if (vcpu_handle_page_fault(vcpu, &info)) {
+			return NULL;
+		}
 		break;
 
 	case 0x17: /* EC = 010111, SMC instruction. */
diff --git a/src/cpu.c b/src/cpu.c
index 03e0074..5db709c 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -160,3 +160,74 @@
 {
 	return vcpu - vcpu->vm->vcpus;
 }
+
+/**
+ * Handles a page fault. It does so by determining if it's a legitimate or
+ * spurious fault, and recovering from the latter.
+ *
+ * Returns true if the caller should resume the current vcpu, or false if its VM
+ * should be aborted.
+ */
+bool vcpu_handle_page_fault(const struct vcpu *current,
+			    struct vcpu_fault_info *f)
+{
+	struct vm *vm = current->vm;
+	ipaddr_t second_addr;
+	bool second;
+	int mode;
+	int mask = f->mode | MM_MODE_INVALID;
+	bool ret = false;
+
+	/* We can't recover if we don't know the size. */
+	if (f->size == 0) {
+		goto exit;
+	}
+
+	sl_lock(&vm->lock);
+
+	/*
+	 * Check if this is a legitimate fault, i.e., if the page table doesn't
+	 * allow the access attemped by the VM.
+	 */
+	if (!mm_vm_get_mode(&vm->ptable, f->ipaddr, ipa_add(f->ipaddr, 1),
+			    &mode) ||
+	    (mode & mask) != f->mode) {
+		goto exit_unlock;
+	}
+
+	/*
+	 * Do the same mode check on the second page, if the fault straddles two
+	 * pages.
+	 */
+	second_addr = ipa_add(f->ipaddr, f->size - 1);
+	second = (ipa_addr(f->ipaddr) >> PAGE_BITS) !=
+		 (ipa_addr(second_addr) >> PAGE_BITS);
+	if (second) {
+		if (!mm_vm_get_mode(&vm->ptable, second_addr,
+				    ipa_add(second_addr, 1), &mode) ||
+		    (mode & mask) != f->mode) {
+			goto exit_unlock;
+		}
+	}
+
+	/*
+	 * This is a spurious fault, likely because another CPU is updating the
+	 * page table. It is responsible for issuing global tlb invalidations
+	 * while holding the VM lock, so we don't need to do anything else to
+	 * recover from it. (Acquiring/releasing the lock ensured that the
+	 * invalidations have completed.)
+	 */
+
+	ret = true;
+
+exit_unlock:
+	sl_unlock(&vm->lock);
+exit:
+	if (!ret) {
+		dlog("Stage-2 page fault: pc=0x%x, vmid=%u, vcpu=%u, "
+		     "vaddr=0x%x, ipaddr=0x%x, mode=0x%x, size=%u\n",
+		     f->pc, vm->id, vcpu_index(current), f->vaddr, f->ipaddr,
+		     f->mode, f->size);
+	}
+	return ret;
+}