Handle spurious page faults due to break-before-make.
Also adding test that triggers spurious faults and fails without this
fix, but passes with it.
Change-Id: I30c591d87c5278a8bb4ed4ec992544f751204d90
diff --git a/inc/hf/cpu.h b/inc/hf/cpu.h
index 3b90b13..4812abf 100644
--- a/inc/hf/cpu.h
+++ b/inc/hf/cpu.h
@@ -68,6 +68,14 @@
bool force;
};
+struct vcpu_fault_info {
+ ipaddr_t ipaddr;
+ vaddr_t vaddr;
+ vaddr_t pc;
+ int mode;
+ uint8_t size;
+};
+
struct vcpu {
struct spinlock lock;
enum vcpu_state state;
@@ -127,3 +135,6 @@
void vcpu_on(struct vcpu *vcpu);
void vcpu_off(struct vcpu *vcpu);
size_t vcpu_index(const struct vcpu *vcpu);
+
+bool vcpu_handle_page_fault(const struct vcpu *current,
+ struct vcpu_fault_info *f);
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index df7ade3..1ceb04d 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -422,10 +422,44 @@
return api_abort(current());
}
+/**
+ * Initialises a fault info structure. It assumes that an FnV bit exists at
+ * bit offset 10 of the ESR, and that it is only valid when the bottom 6 bits of
+ * the ESR (the fault status code) are 010000; this is the case for both
+ * instruction and data aborts, but not necessarily for other exception reasons.
+ */
+static struct vcpu_fault_info fault_info_init(uintreg_t esr,
+ const struct vcpu *vcpu, int mode,
+ uint8_t size)
+{
+ uint32_t fsc = esr & 0x3f;
+ struct vcpu_fault_info r;
+
+ r.mode = mode;
+ r.size = size;
+ r.pc = va_init(vcpu->regs.pc);
+
+ /*
+ * Check the FnV bit, which is only valid if dfsc/ifsc is 010000. It
+ * indicates that we cannot rely on far_el2.
+ */
+ if (fsc == 0x10 && esr & (1u << 10)) {
+ r.vaddr = va_init(0);
+ r.ipaddr = ipa_init(read_msr(hpfar_el2) << 8);
+ } else {
+ r.vaddr = va_init(read_msr(far_el2));
+ r.ipaddr = ipa_init((read_msr(hpfar_el2) << 8) |
+ (read_msr(far_el2) & (PAGE_SIZE - 1)));
+ }
+
+ return r;
+}
+
struct vcpu *sync_lower_exception(uintreg_t esr)
{
struct vcpu *vcpu = current();
int32_t ret;
+ struct vcpu_fault_info info;
switch (esr >> 26) {
case 0x01: /* EC = 000001, WFI or WFE. */
@@ -438,34 +472,30 @@
return api_wait_for_interrupt(vcpu);
case 0x24: /* EC = 100100, Data abort. */
- dlog("Lower data abort: pc=0x%x, esr=0x%x, ec=0x%x, vmid=%u, "
- "vcpu=%u",
- vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
- vcpu_index(vcpu));
- if (!(esr & (1u << 10))) { /* Check FnV bit. */
- dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
- read_msr(hpfar_el2) << 8);
- } else {
- dlog(", far=invalid");
- }
+ /*
+ * Determine the size based on the SAS bits, which are only
+ * valid if the ISV bit is set. The WnR bit is used to decide
+ * if it's a read or write.
+ */
+ info = fault_info_init(
+ esr, vcpu, (esr & (1u << 6)) ? MM_MODE_W : MM_MODE_R,
+ (esr & (1u << 24)) ? (1u << ((esr >> 22) & 0x3)) : 0);
- dlog("\n");
+ /* Call the platform-independent handler. */
+ if (vcpu_handle_page_fault(vcpu, &info)) {
+ return NULL;
+ }
break;
case 0x20: /* EC = 100000, Instruction abort. */
- dlog("Lower instruction abort: pc=0x%x, esr=0x%x, ec=0x%x, "
- "vmdid=%u, vcpu=%u",
- vcpu->regs.pc, esr, esr >> 26, vcpu->vm->id,
- vcpu_index(vcpu));
- if (!(esr & (1u << 10))) { /* Check FnV bit. */
- dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2),
- read_msr(hpfar_el2) << 8);
- } else {
- dlog(", far=invalid");
- }
+ /* Determine the size based on the IL bit. */
+ info = fault_info_init(esr, vcpu, MM_MODE_X,
+ (esr & (1u << 25)) ? 4 : 2);
- dlog(", vttbr_el2=0x%x", read_msr(vttbr_el2));
- dlog("\n");
+ /* Call the platform-independent handler. */
+ if (vcpu_handle_page_fault(vcpu, &info)) {
+ return NULL;
+ }
break;
case 0x17: /* EC = 010111, SMC instruction. */
diff --git a/src/cpu.c b/src/cpu.c
index 03e0074..5db709c 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -160,3 +160,74 @@
{
return vcpu - vcpu->vm->vcpus;
}
+
+/**
+ * Handles a page fault. It does so by determining if it's a legitimate or
+ * spurious fault, and recovering from the latter.
+ *
+ * Returns true if the caller should resume the current vcpu, or false if its VM
+ * should be aborted.
+ */
+bool vcpu_handle_page_fault(const struct vcpu *current,
+ struct vcpu_fault_info *f)
+{
+ struct vm *vm = current->vm;
+ ipaddr_t second_addr;
+ bool second;
+ int mode;
+ int mask = f->mode | MM_MODE_INVALID;
+ bool ret = false;
+
+ /* We can't recover if we don't know the size. */
+ if (f->size == 0) {
+ goto exit;
+ }
+
+ sl_lock(&vm->lock);
+
+ /*
+ * Check if this is a legitimate fault, i.e., if the page table doesn't
+ * allow the access attemped by the VM.
+ */
+ if (!mm_vm_get_mode(&vm->ptable, f->ipaddr, ipa_add(f->ipaddr, 1),
+ &mode) ||
+ (mode & mask) != f->mode) {
+ goto exit_unlock;
+ }
+
+ /*
+ * Do the same mode check on the second page, if the fault straddles two
+ * pages.
+ */
+ second_addr = ipa_add(f->ipaddr, f->size - 1);
+ second = (ipa_addr(f->ipaddr) >> PAGE_BITS) !=
+ (ipa_addr(second_addr) >> PAGE_BITS);
+ if (second) {
+ if (!mm_vm_get_mode(&vm->ptable, second_addr,
+ ipa_add(second_addr, 1), &mode) ||
+ (mode & mask) != f->mode) {
+ goto exit_unlock;
+ }
+ }
+
+ /*
+ * This is a spurious fault, likely because another CPU is updating the
+ * page table. It is responsible for issuing global tlb invalidations
+ * while holding the VM lock, so we don't need to do anything else to
+ * recover from it. (Acquiring/releasing the lock ensured that the
+ * invalidations have completed.)
+ */
+
+ ret = true;
+
+exit_unlock:
+ sl_unlock(&vm->lock);
+exit:
+ if (!ret) {
+ dlog("Stage-2 page fault: pc=0x%x, vmid=%u, vcpu=%u, "
+ "vaddr=0x%x, ipaddr=0x%x, mode=0x%x, size=%u\n",
+ f->pc, vm->id, vcpu_index(current), f->vaddr, f->ipaddr,
+ f->mode, f->size);
+ }
+ return ret;
+}
diff --git a/test/vmapi/primary_only/BUILD.gn b/test/vmapi/primary_only/BUILD.gn
index 3039f37..f98f400 100644
--- a/test/vmapi/primary_only/BUILD.gn
+++ b/test/vmapi/primary_only/BUILD.gn
@@ -19,6 +19,7 @@
testonly = true
sources = [
+ "faults.c",
"primary_only.c",
]
diff --git a/test/vmapi/primary_only/faults.c b/test/vmapi/primary_only/faults.c
new file mode 100644
index 0000000..0f6b708
--- /dev/null
+++ b/test/vmapi/primary_only/faults.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdalign.h>
+
+#include "hf/arch/vm/power_mgmt.h"
+
+#include "hf/mm.h"
+#include "hf/spinlock.h"
+
+#include "vmapi/hf/call.h"
+
+#include "hftest.h"
+
+alignas(PAGE_SIZE) static char tx[PAGE_SIZE];
+alignas(PAGE_SIZE) static char rx[PAGE_SIZE];
+
+struct state {
+ volatile bool done;
+ struct spinlock lock;
+};
+
+/**
+ * Releases the lock passed in, then spins reading the rx buffer.
+ */
+static void rx_reader(uintptr_t arg)
+{
+ struct state *s = (struct state *)arg;
+ sl_unlock(&s->lock);
+
+ while (!s->done) {
+ *(volatile char *)(&rx[0]);
+ }
+
+ sl_unlock(&s->lock);
+}
+
+/**
+ * Forces a spurious fault and check that Hafnium recovers from it.
+ */
+TEST(faults, spurious_due_to_configure)
+{
+ struct state s;
+ alignas(4096) static uint8_t other_stack[4096];
+
+ sl_init(&s.lock);
+ s.done = false;
+
+ /* Start secondary cpu while holding lock. */
+ sl_lock(&s.lock);
+ EXPECT_EQ(cpu_start(1, other_stack, sizeof(other_stack), rx_reader,
+ (uintptr_t)&s),
+ true);
+
+ /* Wait for CPU to release the lock. */
+ sl_lock(&s.lock);
+
+ /* Configure the VM's buffers. */
+ EXPECT_EQ(hf_vm_configure((hf_ipaddr_t)&tx[0], (hf_ipaddr_t)&rx[0]), 0);
+
+ /* Tell other CPU to stop and wait for it. */
+ s.done = true;
+ sl_lock(&s.lock);
+}
diff --git a/test/vmapi/primary_only/primary_only.c b/test/vmapi/primary_only/primary_only.c
index d178517..d29943d 100644
--- a/test/vmapi/primary_only/primary_only.c
+++ b/test/vmapi/primary_only/primary_only.c
@@ -111,7 +111,7 @@
TEST(cpus, start)
{
struct spinlock lock = SPINLOCK_INIT;
- alignas(4096) static char other_stack[4096];
+ alignas(4096) static uint8_t other_stack[4096];
/* Start secondary while holding lock. */
sl_lock(&lock);
diff --git a/test/vmapi/primary_with_secondaries/abort.c b/test/vmapi/primary_with_secondaries/abort.c
index 60c0b5d..0cb989a 100644
--- a/test/vmapi/primary_with_secondaries/abort.c
+++ b/test/vmapi/primary_with_secondaries/abort.c
@@ -34,6 +34,20 @@
}
/**
+ * Accessing partially unmapped memory aborts the VM.
+ */
+TEST(abort, straddling_data_abort)
+{
+ struct hf_vcpu_run_return run_res;
+ struct mailbox_buffers mb = set_up_mailbox();
+
+ SERVICE_SELECT(SERVICE_VM0, "straddling_data_abort", mb.send);
+
+ run_res = hf_vcpu_run(SERVICE_VM0, 0);
+ EXPECT_EQ(run_res.code, HF_VCPU_RUN_ABORTED);
+}
+
+/**
* Executing unmapped memory aborts the VM.
*/
TEST(abort, instruction_abort)
@@ -46,3 +60,17 @@
run_res = hf_vcpu_run(SERVICE_VM0, 0);
EXPECT_EQ(run_res.code, HF_VCPU_RUN_ABORTED);
}
+
+/**
+ * Executing partially unmapped memory aborts the VM.
+ */
+TEST(abort, straddling_instruction_abort)
+{
+ struct hf_vcpu_run_return run_res;
+ struct mailbox_buffers mb = set_up_mailbox();
+
+ SERVICE_SELECT(SERVICE_VM0, "straddling_instruction_abort", mb.send);
+
+ run_res = hf_vcpu_run(SERVICE_VM0, 0);
+ EXPECT_EQ(run_res.code, HF_VCPU_RUN_ABORTED);
+}
diff --git a/test/vmapi/primary_with_secondaries/services/abort.c b/test/vmapi/primary_with_secondaries/services/abort.c
index 1571a69..8652caf 100644
--- a/test/vmapi/primary_with_secondaries/services/abort.c
+++ b/test/vmapi/primary_with_secondaries/services/abort.c
@@ -21,6 +21,8 @@
#include "hftest.h"
+alignas(PAGE_SIZE) static uint8_t pages[2 * PAGE_SIZE];
+
TEST_SERVICE(data_abort)
{
/* Not using NULL so static analysis doesn't complain. */
@@ -28,9 +30,31 @@
*p = 12;
}
+TEST_SERVICE(straddling_data_abort)
+{
+ /* Give some memory to the primary VM so that it's unmapped. */
+ ASSERT_EQ(hf_share_memory(HF_PRIMARY_VM_ID,
+ (hf_ipaddr_t)(&pages[PAGE_SIZE]), PAGE_SIZE,
+ HF_MEMORY_GIVE),
+ 0);
+ *(volatile uint64_t *)(&pages[PAGE_SIZE - 6]);
+}
+
TEST_SERVICE(instruction_abort)
{
/* Not using NULL so static analysis doesn't complain. */
int (*f)(void) = (int (*)(void))4;
f();
}
+
+TEST_SERVICE(straddling_instruction_abort)
+{
+ int (*f)(void) = (int (*)(void))(&pages[PAGE_SIZE - 2]);
+
+ /* Give some memory to the primary VM so that it's unmapped. */
+ ASSERT_EQ(hf_share_memory(HF_PRIMARY_VM_ID,
+ (hf_ipaddr_t)(&pages[PAGE_SIZE]), PAGE_SIZE,
+ HF_MEMORY_GIVE),
+ 0);
+ f();
+}