Export policy for selecting message recipient.
Previously this was a first come first served policy but the scheduler
may be in the position to make a better choice. This also conforms to
our intent of exporting policy outside of the hypervisor.
Change-Id: I8cee6ce9b976e5ed990616c896cd53ecd0f083c8
diff --git a/src/api.c b/src/api.c
index 31943ca..938d978 100644
--- a/src/api.c
+++ b/src/api.c
@@ -73,10 +73,18 @@
* If the secondary is blocked but has a timer running, sleep until the
* timer fires rather than indefinitely.
*/
- if (primary_ret.code == HF_VCPU_RUN_WAIT_FOR_INTERRUPT &&
- arch_timer_enabled_current()) {
- primary_ret.code = HF_VCPU_RUN_SLEEP;
- primary_ret.sleep.ns = arch_timer_remaining_ns_current();
+ switch (primary_ret.code) {
+ case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
+ case HF_VCPU_RUN_WAIT_FOR_MESSAGE:
+ primary_ret.sleep.ns =
+ arch_timer_enabled_current()
+ ? arch_timer_remaining_ns_current()
+ : HF_SLEEP_INDEFINITE;
+ break;
+
+ default:
+ /* Do nothing. */
+ break;
}
/* Set the return value for the primary VM's call to HF_VCPU_RUN. */
@@ -129,7 +137,7 @@
};
if (current->vm->id == HF_PRIMARY_VM_ID) {
- /* Noop on the primary as it makes the scheduling decisions. */
+ /* Noop on the primary as it makes the scheduling decisions. */
return NULL;
}
@@ -250,31 +258,9 @@
{
uint32_t intid_index = intid / INTERRUPT_REGISTER_BITS;
uint32_t intid_mask = 1u << (intid % INTERRUPT_REGISTER_BITS);
- bool need_vm_lock;
int64_t ret = 0;
sl_lock(&target_vcpu->lock);
- /*
- * If we need the target_vm lock we need to release the target_vcpu lock
- * first to maintain the correct order of locks. In-between releasing
- * and acquiring it again the state of the vCPU could change in such a
- * way that we don't actually need to touch the target_vm after all, but
- * that's alright: we'll take the target_vm lock anyway, but it's safe,
- * just perhaps a little slow in this unusual case. The reverse is not
- * possible: if need_vm_lock is false, we don't release the target_vcpu
- * lock until we are done, so nothing should change in such as way that
- * we need the VM lock after all.
- */
- need_vm_lock =
- (target_vcpu->interrupts.interrupt_enabled[intid_index] &
- ~target_vcpu->interrupts.interrupt_pending[intid_index] &
- intid_mask) &&
- target_vcpu->state == vcpu_state_blocked_mailbox;
- if (need_vm_lock) {
- sl_unlock(&target_vcpu->lock);
- sl_lock(&target_vm->lock);
- sl_lock(&target_vcpu->lock);
- }
/*
* We only need to change state and (maybe) trigger a virtual IRQ if it
@@ -301,43 +287,6 @@
goto out;
}
- if (target_vcpu->state == vcpu_state_blocked_interrupt) {
- target_vcpu->state = vcpu_state_ready;
- } else if (target_vcpu->state == vcpu_state_blocked_mailbox) {
- /*
- * need_vm_lock must be true if this path is taken, so if you
- * change the condition here or those leading up to it make sure
- * to update the need_vm_lock logic above to match.
- */
-
- /* Take target vCPU out of mailbox recv_waiter list. */
- /*
- * TODO: Consider using a doubly-linked list for the receive
- * waiter list to avoid the linear search here.
- */
- struct vcpu **previous_next_pointer =
- &target_vm->mailbox.recv_waiter;
- while (*previous_next_pointer != NULL &&
- *previous_next_pointer != target_vcpu) {
- /*
- * TODO(qwandor): Do we need to lock the vCPUs somehow
- * while we walk the linked list, or is the VM lock
- * enough?
- */
- previous_next_pointer =
- &(*previous_next_pointer)->mailbox_next;
- }
- if (*previous_next_pointer == NULL) {
- dlog("Target VCPU state is vcpu_state_blocked_mailbox "
- "but is not in VM mailbox waiter list. This "
- "should never happen.\n");
- } else {
- *previous_next_pointer = target_vcpu->mailbox_next;
- }
-
- target_vcpu->state = vcpu_state_ready;
- }
-
if (current->vm->id == HF_PRIMARY_VM_ID) {
/*
* If the call came from the primary VM, let it know that it
@@ -362,9 +311,6 @@
target_vcpu->interrupts.interrupt_pending[intid_index] |= intid_mask;
sl_unlock(&target_vcpu->lock);
- if (need_vm_lock) {
- sl_unlock(&target_vm->lock);
- }
return ret;
}
@@ -374,12 +320,56 @@
* value needs to be forced onto the vCPU.
*/
static bool api_vcpu_prepare_run(const struct vcpu *current, struct vcpu *vcpu,
- struct retval_state *vcpu_retval,
struct hf_vcpu_run_return *run_ret)
{
+ bool need_vm_lock;
bool ret;
- sl_lock(&vcpu->lock);
+ /*
+ * Wait until the registers become available. All locks must be
+ * released between iterations of this loop to avoid potential deadlocks
+ * if, on any path, a lock needs to be taken after taking the decision
+ * to switch context but before the registers have been saved.
+ *
+ * The VM lock is not needed in the common case so it must only be taken
+ * when it is going to be needed. This ensures there are no inter-vCPU
+ * dependencies in the common run case meaning the sensitive context
+ * switch performance is consistent.
+ */
+ for (;;) {
+ sl_lock(&vcpu->lock);
+
+ /* The VM needs to be locked to deliver mailbox messages. */
+ need_vm_lock = vcpu->state == vcpu_state_blocked_mailbox;
+ if (need_vm_lock) {
+ sl_unlock(&vcpu->lock);
+ sl_lock(&vcpu->vm->lock);
+ sl_lock(&vcpu->lock);
+ }
+
+ if (vcpu->regs_available) {
+ break;
+ }
+
+ if (vcpu->state == vcpu_state_running) {
+ /*
+ * vCPU is running on another pCPU.
+ *
+ * It's ok to not return the sleep duration here because
+ * the other physical CPU that is currently running this
+ * vCPU will return sleep duration if neeed. The default
+ * return value is HF_VCPU_RUN_WAIT_FOR_INTERRUPT, so no
+ * need to set it explicitly.
+ */
+ ret = false;
+ goto out;
+ }
+
+ sl_unlock(&vcpu->lock);
+ if (need_vm_lock) {
+ sl_unlock(&vcpu->vm->lock);
+ }
+ }
if (atomic_load_explicit(&vcpu->vm->aborting, memory_order_relaxed)) {
if (vcpu->state != vcpu_state_aborted) {
@@ -391,39 +381,37 @@
goto out;
}
- /*
- * Wait until the registers become available. Care must be taken when
- * looping on this: it shouldn't be done while holding other locks to
- * avoid deadlocks.
- */
- while (!vcpu->regs_available) {
- if (vcpu->state == vcpu_state_running) {
- /*
- * vCPU is running on another pCPU.
- *
- * It's ok to not return HF_VCPU_RUN_SLEEP here because
- * the other physical CPU that is currently running this
- * vcpu will return HF_VCPU_RUN_SLEEP if neeed. The
- * default return value is
- * HF_VCPU_RUN_WAIT_FOR_INTERRUPT, so no need to set it
- * explicitly.
- */
- ret = false;
- goto out;
- }
-
- sl_unlock(&vcpu->lock);
- sl_lock(&vcpu->lock);
- }
-
switch (vcpu->state) {
case vcpu_state_running:
case vcpu_state_off:
case vcpu_state_aborted:
ret = false;
goto out;
- case vcpu_state_blocked_interrupt:
+
case vcpu_state_blocked_mailbox:
+ /*
+ * A pending message allows the vCPU to run so the message can
+ * be delivered directly.
+ */
+ if (vcpu->vm->mailbox.state == mailbox_state_received) {
+ arch_regs_set_retval(
+ &vcpu->regs,
+ hf_mailbox_receive_return_encode((
+ struct hf_mailbox_receive_return){
+ .vm_id = vcpu->vm->mailbox.recv_from_id,
+ .size = vcpu->vm->mailbox.recv_bytes,
+ }));
+ vcpu->vm->mailbox.state = mailbox_state_read;
+ break;
+ }
+ /* Fall through. */
+ case vcpu_state_blocked_interrupt:
+ /* Allow virtual interrupts to be delivered. */
+ if (vcpu->interrupts.enabled_and_pending_count > 0) {
+ break;
+ }
+
+ /* The timer expired so allow the interrupt to be delivered. */
if (arch_timer_pending(&vcpu->regs)) {
break;
}
@@ -433,31 +421,25 @@
* the primary which called vcpu_run.
*/
if (arch_timer_enabled(&vcpu->regs)) {
- run_ret->code = HF_VCPU_RUN_SLEEP;
+ run_ret->code =
+ vcpu->state == vcpu_state_blocked_mailbox
+ ? HF_VCPU_RUN_WAIT_FOR_MESSAGE
+ : HF_VCPU_RUN_WAIT_FOR_INTERRUPT;
run_ret->sleep.ns =
arch_timer_remaining_ns(&vcpu->regs);
}
ret = false;
goto out;
+
case vcpu_state_ready:
break;
}
- /*
- * If we made it to here then either the state was vcpu_state_ready or
- * the timer is pending, so the vCPU should run to handle the timer
- * firing.
- */
+ /* It has been decided that the vCPU should be run. */
vcpu->cpu = current->cpu;
vcpu->state = vcpu_state_running;
- /* Fetch return value to inject into vCPU if there is one. */
- *vcpu_retval = vcpu->retval;
- if (vcpu_retval->force) {
- vcpu->retval.force = false;
- }
-
/*
* Mark the registers as unavailable now that we're about to reflect
* them onto the real registers. This will also prevent another physical
@@ -469,6 +451,10 @@
out:
sl_unlock(&vcpu->lock);
+ if (need_vm_lock) {
+ sl_unlock(&vcpu->vm->lock);
+ }
+
return ret;
}
@@ -481,9 +467,9 @@
{
struct vm *vm;
struct vcpu *vcpu;
- struct retval_state vcpu_retval;
struct hf_vcpu_run_return ret = {
.code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT,
+ .sleep.ns = HF_SLEEP_INDEFINITE,
};
/* Only the primary VM can switch vcpus. */
@@ -509,7 +495,7 @@
/* Update state if allowed. */
vcpu = &vm->vcpus[vcpu_idx];
- if (!api_vcpu_prepare_run(current, vcpu, &vcpu_retval, &ret)) {
+ if (!api_vcpu_prepare_run(current, vcpu, &ret)) {
goto out;
}
@@ -543,11 +529,6 @@
*/
ret.code = HF_VCPU_RUN_PREEMPTED;
- /* Update return value for the next vcpu if one was injected. */
- if (vcpu_retval.force) {
- arch_regs_set_retval(&vcpu->regs, vcpu_retval.value);
- }
-
out:
return ret;
}
@@ -754,8 +735,10 @@
struct vm *from = current->vm;
struct vm *to;
const void *from_buf;
- uint16_t vcpu;
int64_t ret;
+ struct hf_vcpu_run_return primary_ret = {
+ .code = HF_VCPU_RUN_MESSAGE,
+ };
/* Limit the size of transfer. */
if (size > HF_MAILBOX_SIZE) {
@@ -812,69 +795,24 @@
memcpy(to->mailbox.recv, from_buf, size);
to->mailbox.recv_bytes = size;
to->mailbox.recv_from_id = from->id;
- to->mailbox.state = mailbox_state_read;
+ primary_ret.message.vm_id = to->id;
+ ret = 0;
/* Messages for the primary VM are delivered directly. */
if (to->id == HF_PRIMARY_VM_ID) {
- struct hf_vcpu_run_return primary_ret = {
- .code = HF_VCPU_RUN_MESSAGE,
- .message.size = size,
- };
-
+ primary_ret.message.size = size,
+ to->mailbox.state = mailbox_state_read;
*next = api_switch_to_primary(current, primary_ret,
vcpu_state_ready);
- ret = 0;
goto out;
}
- /*
- * Try to find a vcpu to handle the message and tell the scheduler to
- * run it.
- */
- if (to->mailbox.recv_waiter == NULL) {
- /*
- * The scheduler must choose a vcpu to interrupt so it can
- * handle the message.
- */
- to->mailbox.state = mailbox_state_received;
- vcpu = HF_INVALID_VCPU;
- } else {
- struct vcpu *to_vcpu = to->mailbox.recv_waiter;
-
- /*
- * Take target vcpu out of waiter list and mark it as ready to
- * run again.
- */
- sl_lock(&to_vcpu->lock);
- to->mailbox.recv_waiter = to_vcpu->mailbox_next;
- to_vcpu->state = vcpu_state_ready;
-
- /* Return from HF_MAILBOX_RECEIVE. */
- to_vcpu->retval.force = true;
- to_vcpu->retval.value = hf_mailbox_receive_return_encode(
- (struct hf_mailbox_receive_return){
- .vm_id = to->mailbox.recv_from_id,
- .size = size,
- });
-
- sl_unlock(&to_vcpu->lock);
-
- vcpu = to_vcpu - to->vcpus;
- }
+ to->mailbox.state = mailbox_state_received;
/* Return to the primary VM directly or with a switch. */
- if (from->id == HF_PRIMARY_VM_ID) {
- ret = vcpu;
- } else {
- struct hf_vcpu_run_return primary_ret = {
- .code = HF_VCPU_RUN_WAKE_UP,
- .wake_up.vm_id = to->id,
- .wake_up.vcpu = vcpu,
- };
-
+ if (from->id != HF_PRIMARY_VM_ID) {
*next = api_switch_to_primary(current, primary_ret,
vcpu_state_ready);
- ret = 0;
}
out:
@@ -925,17 +863,10 @@
goto out;
}
- sl_lock(¤t->lock);
-
- /* Push vcpu into waiter list. */
- current->mailbox_next = vm->mailbox.recv_waiter;
- vm->mailbox.recv_waiter = current;
- sl_unlock(¤t->lock);
-
/* Switch back to primary vm to block. */
{
struct hf_vcpu_run_return run_return = {
- .code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT,
+ .code = HF_VCPU_RUN_WAIT_FOR_MESSAGE,
};
*next = api_switch_to_primary(current, run_return,