Handle spurious page faults due to break-before-make.

Also adding test that triggers spurious faults and fails without this
fix, but passes with it.

Change-Id: I30c591d87c5278a8bb4ed4ec992544f751204d90
diff --git a/inc/hf/cpu.h b/inc/hf/cpu.h
index 3b90b13..4812abf 100644
--- a/inc/hf/cpu.h
+++ b/inc/hf/cpu.h
@@ -68,6 +68,14 @@
 	bool force;
 };
 
+struct vcpu_fault_info {
+	ipaddr_t ipaddr;
+	vaddr_t vaddr;
+	vaddr_t pc;
+	int mode;
+	uint8_t size;
+};
+
 struct vcpu {
 	struct spinlock lock;
 	enum vcpu_state state;
@@ -127,3 +135,6 @@
 void vcpu_on(struct vcpu *vcpu);
 void vcpu_off(struct vcpu *vcpu);
 size_t vcpu_index(const struct vcpu *vcpu);
+
+bool vcpu_handle_page_fault(const struct vcpu *current,
+			    struct vcpu_fault_info *f);
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index df7ade3..1ceb04d 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -422,10 +422,44 @@
 	return api_abort(current());
 }
 
+/**
+ * Initialises a fault info structure. It assumes that an FnV bit exists at
+ * bit offset 10 of the ESR, and that it is only valid when the bottom 6 bits of
+ * the ESR (the fault status code) are 010000; this is the case for both
+ * instruction and data aborts, but not necessarily for other exception reasons.
+ */
+static struct vcpu_fault_info fault_info_init(uintreg_t esr,
+					      const struct vcpu *vcpu, int mode,
+					      uint8_t size)
+{
+	uint32_t fsc = esr & 0x3f;
+	struct vcpu_fault_info r;
+
+	r.mode = mode;
+	r.size = size;
+	r.pc = va_init(vcpu->regs.pc);
+
+	/*
+	 * Check the FnV bit, which is only valid if dfsc/ifsc is 010000. It
+	 * indicates that we cannot rely on far_el2.
+	 */
+	if (fsc == 0x10 && esr & (1u << 10)) {
+		r.vaddr = va_init(0);
+		r.ipaddr = ipa_init(read_msr(hpfar_el2) << 8);
+	} else {
+		r.vaddr = va_init(read_msr(far_el2));
+		r.ipaddr = ipa_init((read_msr(hpfar_el2) << 8) |
+				    (read_msr(far_el2) & (PAGE_SIZE - 1)));
+	}
+
+	return r;
+}
+
 struct vcpu *sync_lower_exception(uintreg_t esr)
 {
 	struct vcpu *vcpu = current();
 	int32_t ret;
+	struct vcpu_fault_info info;
 
 	switch (esr >> 26) {
 	case 0x01: /* EC = 000001, WFI or WFE. */
@@ -438,34 +472,30 @@
 		return api_wait_for_interrupt(vcpu);
 
 	case 0x24: /* EC = 100100, Data abort. */
-		dlog("Lower data abort: pc=0x%x, esr=0x%x, ec=0x%x, vmid=%u, "
-		     "vcpu=%u",
-		     vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
-		     vcpu_index(vcpu));
-		if (!(esr & (1u << 10))) { /* Check FnV bit. */
-			dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
-			     read_msr(hpfar_el2) << 8);
-		} else {
-			dlog(", far=invalid");
-		}
+		/*
+		 * Determine the size based on the SAS bits, which are only
+		 * valid if the ISV bit is set. The WnR bit is used to decide
+		 * if it's a read or write.
+		 */
+		info = fault_info_init(
+			esr, vcpu, (esr & (1u << 6)) ? MM_MODE_W : MM_MODE_R,
+			(esr & (1u << 24)) ? (1u << ((esr >> 22) & 0x3)) : 0);
 
-		dlog("\n");
+		/* Call the platform-independent handler. */
+		if (vcpu_handle_page_fault(vcpu, &info)) {
+			return NULL;
+		}
 		break;
 
 	case 0x20: /* EC = 100000, Instruction abort. */
-		dlog("Lower instruction abort: pc=0x%x, esr=0x%x, ec=0x%x, "
-		     "vmdid=%u, vcpu=%u",
-		     vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
-		     vcpu_index(vcpu));
-		if (!(esr & (1u << 10))) { /* Check FnV bit. */
-			dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
-			     read_msr(hpfar_el2) << 8);
-		} else {
-			dlog(", far=invalid");
-		}
+		/* Determine the size based on the IL bit. */
+		info = fault_info_init(esr, vcpu, MM_MODE_X,
+				       (esr & (1u << 25)) ? 4 : 2);
 
-		dlog(", vttbr_el2=0x%x", read_msr(vttbr_el2));
-		dlog("\n");
+		/* Call the platform-independent handler. */
+		if (vcpu_handle_page_fault(vcpu, &info)) {
+			return NULL;
+		}
 		break;
 
 	case 0x17: /* EC = 010111, SMC instruction. */
diff --git a/src/cpu.c b/src/cpu.c
index 03e0074..5db709c 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -160,3 +160,74 @@
 {
 	return vcpu - vcpu->vm->vcpus;
 }
+
+/**
+ * Handles a page fault. It does so by determining if it's a legitimate or
+ * spurious fault, and recovering from the latter.
+ *
+ * Returns true if the caller should resume the current vcpu, or false if its VM
+ * should be aborted.
+ */
+bool vcpu_handle_page_fault(const struct vcpu *current,
+			    struct vcpu_fault_info *f)
+{
+	struct vm *vm = current->vm;
+	ipaddr_t second_addr;
+	bool second;
+	int mode;
+	int mask = f->mode | MM_MODE_INVALID;
+	bool ret = false;
+
+	/* We can't recover if we don't know the size. */
+	if (f->size == 0) {
+		goto exit;
+	}
+
+	sl_lock(&vm->lock);
+
+	/*
+	 * Check if this is a legitimate fault, i.e., if the page table doesn't
+	 * allow the access attemped by the VM.
+	 */
+	if (!mm_vm_get_mode(&vm->ptable, f->ipaddr, ipa_add(f->ipaddr, 1),
+			    &mode) ||
+	    (mode & mask) != f->mode) {
+		goto exit_unlock;
+	}
+
+	/*
+	 * Do the same mode check on the second page, if the fault straddles two
+	 * pages.
+	 */
+	second_addr = ipa_add(f->ipaddr, f->size - 1);
+	second = (ipa_addr(f->ipaddr) >> PAGE_BITS) !=
+		 (ipa_addr(second_addr) >> PAGE_BITS);
+	if (second) {
+		if (!mm_vm_get_mode(&vm->ptable, second_addr,
+				    ipa_add(second_addr, 1), &mode) ||
+		    (mode & mask) != f->mode) {
+			goto exit_unlock;
+		}
+	}
+
+	/*
+	 * This is a spurious fault, likely because another CPU is updating the
+	 * page table. It is responsible for issuing global tlb invalidations
+	 * while holding the VM lock, so we don't need to do anything else to
+	 * recover from it. (Acquiring/releasing the lock ensured that the
+	 * invalidations have completed.)
+	 */
+
+	ret = true;
+
+exit_unlock:
+	sl_unlock(&vm->lock);
+exit:
+	if (!ret) {
+		dlog("Stage-2 page fault: pc=0x%x, vmid=%u, vcpu=%u, "
+		     "vaddr=0x%x, ipaddr=0x%x, mode=0x%x, size=%u\n",
+		     f->pc, vm->id, vcpu_index(current), f->vaddr, f->ipaddr,
+		     f->mode, f->size);
+	}
+	return ret;
+}
diff --git a/test/vmapi/primary_only/BUILD.gn b/test/vmapi/primary_only/BUILD.gn
index 3039f37..f98f400 100644
--- a/test/vmapi/primary_only/BUILD.gn
+++ b/test/vmapi/primary_only/BUILD.gn
@@ -19,6 +19,7 @@
   testonly = true
 
   sources = [
+    "faults.c",
     "primary_only.c",
   ]
 
diff --git a/test/vmapi/primary_only/faults.c b/test/vmapi/primary_only/faults.c
new file mode 100644
index 0000000..0f6b708
--- /dev/null
+++ b/test/vmapi/primary_only/faults.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdalign.h>
+
+#include "hf/arch/vm/power_mgmt.h"
+
+#include "hf/mm.h"
+#include "hf/spinlock.h"
+
+#include "vmapi/hf/call.h"
+
+#include "hftest.h"
+
+alignas(PAGE_SIZE) static char tx[PAGE_SIZE];
+alignas(PAGE_SIZE) static char rx[PAGE_SIZE];
+
+struct state {
+	volatile bool done;
+	struct spinlock lock;
+};
+
+/**
+ * Releases the lock passed in, then spins reading the rx buffer.
+ */
+static void rx_reader(uintptr_t arg)
+{
+	struct state *s = (struct state *)arg;
+	sl_unlock(&s->lock);
+
+	while (!s->done) {
+		*(volatile char *)(&rx[0]);
+	}
+
+	sl_unlock(&s->lock);
+}
+
+/**
+ * Forces a spurious fault and check that Hafnium recovers from it.
+ */
+TEST(faults, spurious_due_to_configure)
+{
+	struct state s;
+	alignas(4096) static uint8_t other_stack[4096];
+
+	sl_init(&s.lock);
+	s.done = false;
+
+	/* Start secondary cpu while holding lock. */
+	sl_lock(&s.lock);
+	EXPECT_EQ(cpu_start(1, other_stack, sizeof(other_stack), rx_reader,
+			    (uintptr_t)&s),
+		  true);
+
+	/* Wait for CPU to release the lock. */
+	sl_lock(&s.lock);
+
+	/* Configure the VM's buffers. */
+	EXPECT_EQ(hf_vm_configure((hf_ipaddr_t)&tx[0], (hf_ipaddr_t)&rx[0]), 0);
+
+	/* Tell other CPU to stop and wait for it. */
+	s.done = true;
+	sl_lock(&s.lock);
+}
diff --git a/test/vmapi/primary_only/primary_only.c b/test/vmapi/primary_only/primary_only.c
index d178517..d29943d 100644
--- a/test/vmapi/primary_only/primary_only.c
+++ b/test/vmapi/primary_only/primary_only.c
@@ -111,7 +111,7 @@
 TEST(cpus, start)
 {
 	struct spinlock lock = SPINLOCK_INIT;
-	alignas(4096) static char other_stack[4096];
+	alignas(4096) static uint8_t other_stack[4096];
 
 	/* Start secondary while holding lock. */
 	sl_lock(&lock);
diff --git a/test/vmapi/primary_with_secondaries/abort.c b/test/vmapi/primary_with_secondaries/abort.c
index 60c0b5d..0cb989a 100644
--- a/test/vmapi/primary_with_secondaries/abort.c
+++ b/test/vmapi/primary_with_secondaries/abort.c
@@ -34,6 +34,20 @@
 }
 
 /**
+ * Accessing partially unmapped memory aborts the VM.
+ */
+TEST(abort, straddling_data_abort)
+{
+	struct hf_vcpu_run_return run_res;
+	struct mailbox_buffers mb = set_up_mailbox();
+
+	SERVICE_SELECT(SERVICE_VM0, "straddling_data_abort", mb.send);
+
+	run_res = hf_vcpu_run(SERVICE_VM0, 0);
+	EXPECT_EQ(run_res.code, HF_VCPU_RUN_ABORTED);
+}
+
+/**
  * Executing unmapped memory aborts the VM.
  */
 TEST(abort, instruction_abort)
@@ -46,3 +60,17 @@
 	run_res = hf_vcpu_run(SERVICE_VM0, 0);
 	EXPECT_EQ(run_res.code, HF_VCPU_RUN_ABORTED);
 }
+
+/**
+ * Executing partially unmapped memory aborts the VM.
+ */
+TEST(abort, straddling_instruction_abort)
+{
+	struct hf_vcpu_run_return run_res;
+	struct mailbox_buffers mb = set_up_mailbox();
+
+	SERVICE_SELECT(SERVICE_VM0, "straddling_instruction_abort", mb.send);
+
+	run_res = hf_vcpu_run(SERVICE_VM0, 0);
+	EXPECT_EQ(run_res.code, HF_VCPU_RUN_ABORTED);
+}
diff --git a/test/vmapi/primary_with_secondaries/services/abort.c b/test/vmapi/primary_with_secondaries/services/abort.c
index 1571a69..8652caf 100644
--- a/test/vmapi/primary_with_secondaries/services/abort.c
+++ b/test/vmapi/primary_with_secondaries/services/abort.c
@@ -21,6 +21,8 @@
 
 #include "hftest.h"
 
+alignas(PAGE_SIZE) static uint8_t pages[2 * PAGE_SIZE];
+
 TEST_SERVICE(data_abort)
 {
 	/* Not using NULL so static analysis doesn't complain. */
@@ -28,9 +30,31 @@
 	*p = 12;
 }
 
+TEST_SERVICE(straddling_data_abort)
+{
+	/* Give some memory to the primary VM so that it's unmapped. */
+	ASSERT_EQ(hf_share_memory(HF_PRIMARY_VM_ID,
+				  (hf_ipaddr_t)(&pages[PAGE_SIZE]), PAGE_SIZE,
+				  HF_MEMORY_GIVE),
+		  0);
+	*(volatile uint64_t *)(&pages[PAGE_SIZE - 6]);
+}
+
 TEST_SERVICE(instruction_abort)
 {
 	/* Not using NULL so static analysis doesn't complain. */
 	int (*f)(void) = (int (*)(void))4;
 	f();
 }
+
+TEST_SERVICE(straddling_instruction_abort)
+{
+	int (*f)(void) = (int (*)(void))(&pages[PAGE_SIZE - 2]);
+
+	/* Give some memory to the primary VM so that it's unmapped. */
+	ASSERT_EQ(hf_share_memory(HF_PRIMARY_VM_ID,
+				  (hf_ipaddr_t)(&pages[PAGE_SIZE]), PAGE_SIZE,
+				  HF_MEMORY_GIVE),
+		  0);
+	f();
+}